From 3b354016e9e23edc28bd4ca78f8714fdb006760e Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 4 Oct 2017 12:47:05 -0700 Subject: [PATCH 001/909] Rename SavedModelExporter to LatestExporter. PiperOrigin-RevId: 171048345 --- tensorflow/python/estimator/exporter.py | 2 +- tensorflow/python/estimator/exporter_test.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 621dece119..505820dd93 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -55,7 +55,7 @@ class Exporter(object): pass -class SavedModelExporter(Exporter): +class LatestExporter(Exporter): """This class exports the serving graph and checkpoints. In addition, the class also garbage collects stale exports. diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 106202c9c2..2ceff1bfd6 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -30,14 +30,15 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import compat -class SavedModelExporterTest(test.TestCase): +class LatestExporterTest(test.TestCase): def test_error_out_if_exports_to_keep_is_zero(self): def _serving_input_fn(): pass + with self.assertRaisesRegexp(ValueError, "positive number"): - exporter_lib.SavedModelExporter( - name="saved_model_exporter", + exporter_lib.LatestExporter( + name="latest_exporter", serving_input_fn=_serving_input_fn, exports_to_keep=0) @@ -49,8 +50,8 @@ class SavedModelExporterTest(test.TestCase): export_dir_base = tempfile.mkdtemp() + "export/" gfile.MkDir(export_dir_base) - exporter = exporter_lib.SavedModelExporter( - name="saved_model_exporter", + exporter = exporter_lib.LatestExporter( + name="latest_exporter", serving_input_fn=_serving_input_fn, assets_extra={"from/path": "to/path"}, as_text=False, @@ -85,8 +86,8 @@ class SavedModelExporterTest(test.TestCase): def _serving_input_fn(): return array_ops.constant([1]), None - exporter = exporter_lib.SavedModelExporter( - name="saved_model_exporter", + exporter = exporter_lib.LatestExporter( + name="latest_exporter", serving_input_fn=_serving_input_fn, exports_to_keep=2) estimator = test.mock.Mock(spec=estimator_lib.Estimator) -- GitLab From 491584ff4dce4888227fc4227f81ffca12942534 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 4 Oct 2017 12:48:27 -0700 Subject: [PATCH 002/909] eager: Always run dataset iterator operations on CPU. It has no kernels for other devices. With an explicit "tf.device()" before invoking the kernel we ensure that Iterator.next() functions even when placed inside a: with tf.device("/device:GPU:0") PiperOrigin-RevId: 171048558 --- tensorflow/contrib/eager/python/datasets.py | 39 ++++++++++++--------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 9973f4eee2..fb9fabd6c1 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -23,6 +23,7 @@ import threading from tensorflow.python.data.util import nest from tensorflow.python.eager import context from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import resource_variable_ops @@ -62,20 +63,22 @@ class Iterator(object): raise RuntimeError( "{} objects only make sense when eager execution is enabled".format( type(self))) - ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access - self._output_types = dataset.output_types - self._flat_output_types = nest.flatten(dataset.output_types) - self._flat_output_shapes = nest.flatten(dataset.output_shapes) - self._resource = gen_dataset_ops.iterator( - container="", - shared_name=_iterator_shared_name(), - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - gen_dataset_ops.make_iterator(ds_variant, self._resource) + with ops.device("/device:CPU:0"): + ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access + self._output_types = dataset.output_types + self._flat_output_types = nest.flatten(dataset.output_types) + self._flat_output_shapes = nest.flatten(dataset.output_shapes) + self._resource = gen_dataset_ops.iterator( + container="", + shared_name=_iterator_shared_name(), + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + gen_dataset_ops.make_iterator(ds_variant, self._resource) def __del__(self): if self._resource is not None: - resource_variable_ops.destroy_resource_op(self._resource) + with ops.device("/device:CPU:0"): + resource_variable_ops.destroy_resource_op(self._resource) self._resource = None def __iter__(self): @@ -87,10 +90,14 @@ class Iterator(object): def next(self): """Return the next tf.Tensor from the dataset.""" try: - ret = gen_dataset_ops.iterator_get_next( - self._resource, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - return nest.pack_sequence_as(self._output_types, ret) + # TODO(ashankar): Consider removing this ops.device() contextmanager + # and instead mimic ops placement in graphs: Operations on resource + # handles execute on the same device as where the resource is placed. + with ops.device("/device:CPU:0"): + ret = gen_dataset_ops.iterator_get_next( + self._resource, + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + return nest.pack_sequence_as(self._output_types, ret) except errors.OutOfRangeError: raise StopIteration -- GitLab From cf17ec96ed987386d73c645cd8b44aa32b7568b1 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Wed, 4 Oct 2017 12:50:36 -0700 Subject: [PATCH 003/909] Add V2 versions of output window size computation functions for convolution. These V2 versions take arbitrary dilation rates. In preparation for the support of native cudnn dilated convolution. PiperOrigin-RevId: 171048878 --- tensorflow/core/framework/common_shape_fns.cc | 100 +++++++++++++++--- tensorflow/core/framework/common_shape_fns.h | 56 +++++++++- tensorflow/core/kernels/conv_grad_ops.cc | 79 ++++++++++---- tensorflow/core/kernels/conv_grad_ops.h | 8 ++ 4 files changed, 204 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 92f9fd451b..4796c3c00a 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -17,24 +17,31 @@ limitations under the License. namespace tensorflow { -Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size, - int64 stride, Padding padding_type, - int64* output_size, int64* padding_before, - int64* padding_after) { +Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size, + int64 dilation_rate, int64 stride, + Padding padding_type, int64* output_size, + int64* padding_before, + int64* padding_after) { if (stride <= 0) { return errors::InvalidArgument("Stride must be > 0, but got ", stride); } + if (dilation_rate < 1) { + return errors::InvalidArgument("Dilation rate must be >= 1, but got ", + dilation_rate); + } - // See also the parallel implementation in GetWindowedOutputSizeFromDims. + // See also the parallel implementation in GetWindowedOutputSizeFromDimsV2. + int64 effective_filter_size = (filter_size - 1) * dilation_rate + 1; switch (padding_type) { case Padding::VALID: - *output_size = (input_size - filter_size + stride) / stride; + *output_size = (input_size - effective_filter_size + stride) / stride; *padding_before = *padding_after = 0; break; case Padding::SAME: *output_size = (input_size + stride - 1) / stride; const int64 padding_needed = - std::max(0LL, (*output_size - 1) * stride + filter_size - input_size); + std::max(0LL, (*output_size - 1) * stride + effective_filter_size - + input_size); // For odd values of total padding, add more padding at the 'right' // side of the given dimension. *padding_before = padding_needed / 2; @@ -47,15 +54,35 @@ Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size, return Status::OK(); } +Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size, + int64 stride, Padding padding_type, + int64* output_size, int64* padding_before, + int64* padding_after) { + return GetWindowedOutputSizeVerboseV2(input_size, filter_size, + /*dilation_rate=*/1, stride, + padding_type, output_size, + padding_before, padding_after); +} + Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride, Padding padding_type, int64* output_size, - int64* padding) { + int64* padding_size) { int64 padding_after_unused; return GetWindowedOutputSizeVerbose(input_size, filter_size, stride, - padding_type, output_size, padding, + padding_type, output_size, padding_size, &padding_after_unused); } +Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size, + int64 dilation_rate, int64 stride, + Padding padding_type, int64* output_size, + int64* padding_size) { + int64 padding_after_unused; + return GetWindowedOutputSizeVerboseV2(input_size, filter_size, dilation_rate, + stride, padding_type, output_size, + padding_size, &padding_after_unused); +} + Status Get3dOutputSize(const std::array& input, const std::array& window, const std::array& strides, @@ -69,34 +96,77 @@ Status Get3dOutputSize(const std::array& input, return Status::OK(); } +Status Get3dOutputSizeV2(const std::array& input, + const std::array& window, + const std::array& dilations, + const std::array& strides, + Padding padding_type, std::array* output_ptr, + std::array* padding_ptr) { + for (size_t i = 0; i < input.size(); ++i) { + TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2( + input[i], window[i], dilations[i], strides[i], padding_type, + &(*output_ptr)[i], &(*padding_ptr)[i])); + } + return Status::OK(); +} + namespace shape_inference { -Status GetWindowedOutputSizeFromDims( +// The V2 version computes windowed output size with arbitrary dilation_rate, +// while the original version only handles the cases where dilation_rates equal +// to 1. +Status GetWindowedOutputSizeFromDimsV2( shape_inference::InferenceContext* c, shape_inference::DimensionHandle input_size, - shape_inference::DimensionOrConstant filter_size, int64 stride, - Padding padding_type, shape_inference::DimensionHandle* output_size) { + shape_inference::DimensionOrConstant filter_size, int64 dilation_rate, + int64 stride, Padding padding_type, + shape_inference::DimensionHandle* output_size) { if (stride <= 0) { return errors::InvalidArgument("Stride must be > 0, but got ", stride); } + if (dilation_rate < 1) { + return errors::InvalidArgument("Dilation rate must be >= 1, but got ", + dilation_rate); + } + // See also the parallel implementation in GetWindowedOutputSizeVerbose. switch (padding_type) { case Padding::VALID: - TF_RETURN_IF_ERROR(c->Subtract(input_size, filter_size, output_size)); + if (dilation_rate > 1) { + DimensionHandle window_size; + TF_RETURN_IF_ERROR( + c->Subtract(c->MakeDim(filter_size), 1, &window_size)); + TF_RETURN_IF_ERROR( + c->Multiply(window_size, dilation_rate, &window_size)); + TF_RETURN_IF_ERROR(c->Add(window_size, 1, &window_size)); + TF_RETURN_IF_ERROR(c->Subtract(input_size, window_size, output_size)); + } else { + TF_RETURN_IF_ERROR(c->Subtract(input_size, filter_size, output_size)); + } TF_RETURN_IF_ERROR(c->Add(*output_size, stride, output_size)); TF_RETURN_IF_ERROR(c->Divide(*output_size, stride, - false /* evenly_divisible */, output_size)); + /*evenly_divisible=*/false, output_size)); break; case Padding::SAME: TF_RETURN_IF_ERROR(c->Add(input_size, stride - 1, output_size)); TF_RETURN_IF_ERROR(c->Divide(*output_size, stride, - false /* evenly_divisible */, output_size)); + /*evenly_divisible=*/false, output_size)); break; } return Status::OK(); } +Status GetWindowedOutputSizeFromDims( + shape_inference::InferenceContext* c, + shape_inference::DimensionHandle input_size, + shape_inference::DimensionOrConstant filter_size, int64 stride, + Padding padding_type, shape_inference::DimensionHandle* output_size) { + return GetWindowedOutputSizeFromDimsV2(c, input_size, filter_size, + /*dilation_rate=*/1, stride, + padding_type, output_size); +} + Status UnchangedShape(shape_inference::InferenceContext* c) { c->set_output(0, c->input(0)); return Status::OK(); diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h index 88fea550a6..c0deb473a2 100644 --- a/tensorflow/core/framework/common_shape_fns.h +++ b/tensorflow/core/framework/common_shape_fns.h @@ -75,6 +75,32 @@ Status GetWindowedOutputSize(int64 input_size, int64 filter_size, int64 stride, Padding padding_type, int64* output_size, int64* padding_size); +// The V2 version computes the same outputs with arbitrary dilation_rate. +// The output dimensions are computed as follows: +// - When adding dilation_rate (D), we compute an effective filter size (K'): +// K' = (K - 1) * D + 1 +// - When Padding = SAME: the output size is (H'), where +// H' = ceil(float(H) / float(S)) +// where ceil is the ceiling function. The number of padded cells +// is computed as: +// Pc = ((H' - 1) * S + K' - H) / 2 +// When the stride is 1, the expression simplifies to +// H' = H, Pc = (K'-1)/2. +// This is where SAME comes from - the output has the same size as the input +// has. +// +// - When Padding = VALID: the output size is computed as +// H' = ceil(float(H - K' + 1) / float(S)) +// and the number of padded cells is always zero. +// When the stride is 1, the expression simplifies to +// H' = H-K'+1. +// +// TODO(b/67112639): Merge V2 versions and the original versions eventually. +Status GetWindowedOutputSizeV2(int64 input_size, int64 filter_size, + int64 dilation_rate, int64 stride, + Padding padding_type, int64* output_size, + int64* padding_size); + // Returns the same output dimensions as in GetWindowedOutputSize, but returns // verbose padding dimensions (before/after). Any excess padding // (caused by an odd padding size value) is added to the 'padding_after' @@ -84,6 +110,14 @@ Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size, int64* output_size, int64* padding_before, int64* padding_after); +// The V2 version computes the same outputs with arbitrary dilation_rate. For +// detailed equations, refer to the comments for GetWindowedOutputSizeV2(). +Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size, + int64 dilation_rate, int64 stride, + Padding padding_type, int64* output_size, + int64* padding_before, + int64* padding_after); + // Given an input tensor, kernel, stride and padding type, populates the 3D size // of the output tensor and padding to be applied to the input tensor at the // lower end of every dimension. Use for 3D convolutions, where the input data @@ -92,8 +126,17 @@ Status GetWindowedOutputSizeVerbose(int64 input_size, int64 filter_size, Status Get3dOutputSize(const std::array& input, const std::array& window, const std::array& strides, - Padding padding_type, std::array* output, - std::array* padding); + Padding padding_type, std::array* output_ptr, + std::array* padding_ptr); + +// The V2 version computes the same outputs with arbitrary dilation_rate. For +// detailed equations, refer to the comments for GetWindowedOutputSizeV2(). +Status Get3dOutputSizeV2(const std::array& input, + const std::array& window, + const std::array& dilations, + const std::array& strides, + Padding padding_type, std::array* output_ptr, + std::array* padding_ptr); namespace shape_inference { @@ -104,6 +147,15 @@ Status GetWindowedOutputSizeFromDims(InferenceContext* c, int64 stride, Padding padding_type, DimensionHandle* output_size); +// The V2 version computes the same outputs with arbitrary dilation_rate. For +// detailed equations, refer to the comments for GetWindowedOutputSizeV2(). +Status GetWindowedOutputSizeFromDimsV2(InferenceContext* c, + DimensionHandle input_size, + DimensionOrConstant filter_size, + int64 dilation_rate, int64 stride, + Padding padding_type, + DimensionHandle* output_size); + // Transfers shape of input(0) to output(0). Status UnchangedShape(shape_inference::InferenceContext* c); diff --git a/tensorflow/core/kernels/conv_grad_ops.cc b/tensorflow/core/kernels/conv_grad_ops.cc index 4c864c08a5..170ce31d17 100644 --- a/tensorflow/core/kernels/conv_grad_ops.cc +++ b/tensorflow/core/kernels/conv_grad_ops.cc @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -40,46 +41,64 @@ limitations under the License. namespace tensorflow { -Status ConvBackpropExtractAndVerifyDimension( +// The V2 version computes windowed output size with arbitrary dilation_rate, +// while the original version only handles the cases where dilation_rates equal +// to 1. +Status ConvBackpropExtractAndVerifyDimensionV2( StringPiece label, const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& output_shape, - const std::vector& strides, Padding padding, int spatial_dim, - int filter_spatial_dim, ConvBackpropSpatialDimension* dim) { + const gtl::ArraySlice& dilations, const std::vector& strides, + Padding padding, int spatial_dim, int filter_spatial_dim, + ConvBackpropSpatialDimension* dim) { dim->input_size = input_shape.dim_size(spatial_dim); dim->filter_size = filter_shape.dim_size(filter_spatial_dim); dim->output_size = output_shape.dim_size(spatial_dim); dim->stride = strides[spatial_dim]; + dim->dilation = dilations[spatial_dim]; int64 out_size = 0, pad_size = 0; - TF_RETURN_IF_ERROR(GetWindowedOutputSize(dim->input_size, dim->filter_size, - dim->stride, padding, &out_size, - &pad_size)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeV2(dim->input_size, dim->filter_size, + dim->dilation, dim->stride, + padding, &out_size, &pad_size)); if (dim->output_size != out_size) { return errors::InvalidArgument( label, ": Size of out_backprop doesn't match computed: ", "actual = ", - dim->output_size, ", computed = ", out_size); + dim->output_size, ", computed = ", out_size, + "spatial_dim: ", spatial_dim, " input: ", dim->input_size, + " filter: ", dim->filter_size, " output: ", dim->output_size, + " stride: ", dim->stride, " dilation: ", dim->dilation); } + int64 effective_filter_size = (dim->filter_size - 1) * dim->dilation + 1; dim->expanded_output_size = (dim->output_size - 1) * dim->stride + 1; - const auto padded_out_size = dim->input_size + dim->filter_size - 1; - dim->pad_before = dim->filter_size - 1 - pad_size; + const auto padded_out_size = dim->input_size + effective_filter_size - 1; + dim->pad_before = effective_filter_size - 1 - pad_size; dim->pad_after = padded_out_size - dim->expanded_output_size - dim->pad_before; VLOG(2) << label << ": expanded_out = " << dim->expanded_output_size - << ", filter = " << dim->filter_size + << ", effective_filter_size = " << effective_filter_size << ", padded_out = " << padded_out_size << ", pad_before = " << dim->pad_before << ", pad_after = " << dim->pad_after - << ", strides = " << dim->stride; + << ", dilation = " << dim->dilation << ", strides = " << dim->stride; return Status::OK(); } -Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, - const TensorShape& input_shape, - const TensorShape& filter_shape, - const TensorShape& out_backprop_shape, - const std::vector& strides, - Padding padding, TensorFormat data_format, - ConvBackpropDimensions* dims) { +Status ConvBackpropExtractAndVerifyDimension( + StringPiece label, const TensorShape& input_shape, + const TensorShape& filter_shape, const TensorShape& output_shape, + const std::vector& strides, Padding padding, int spatial_dim, + int filter_spatial_dim, ConvBackpropSpatialDimension* dim) { + static constexpr std::array one_dilations = {{1, 1, 1, 1, 1}}; + return ConvBackpropExtractAndVerifyDimensionV2( + label, input_shape, filter_shape, output_shape, one_dilations, strides, + padding, spatial_dim, filter_spatial_dim, dim); +} + +Status ConvBackpropComputeDimensionsV2( + StringPiece label, int num_spatial_dims, const TensorShape& input_shape, + const TensorShape& filter_shape, const TensorShape& out_backprop_shape, + const gtl::ArraySlice& dilations, const std::vector& strides, + Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims) { // The + 2 in the following line is for the batch and feature dimensions. const int num_dims = num_spatial_dims + 2; if (input_shape.dims() != num_dims) { @@ -98,7 +117,10 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, dims->batch_size = input_shape.dim_size(batch_dim); if (dims->batch_size != out_backprop_shape.dim_size(batch_dim)) { return errors::InvalidArgument( - label, ": input and out_backprop must have the same batch size"); + label, ": input and out_backprop must have the same batch size", + "input batch: ", dims->batch_size, + "outbackprop batch: ", out_backprop_shape.dim_size(batch_dim), + " batch_dim: ", batch_dim); } int feature_dim = GetTensorFeatureDimIndex(num_dims, data_format); @@ -118,11 +140,24 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, dims->spatial_dims.resize(num_spatial_dims); for (int i = 0; i < num_spatial_dims; ++i) { int image_dim = GetTensorSpatialDimIndex(num_dims, data_format, i); - TF_RETURN_IF_ERROR(ConvBackpropExtractAndVerifyDimension( - label, input_shape, filter_shape, out_backprop_shape, strides, padding, - image_dim, i, &dims->spatial_dims[i])); + TF_RETURN_IF_ERROR(ConvBackpropExtractAndVerifyDimensionV2( + label, input_shape, filter_shape, out_backprop_shape, dilations, + strides, padding, image_dim, i, &dims->spatial_dims[i])); } return Status::OK(); } +Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, + const TensorShape& input_shape, + const TensorShape& filter_shape, + const TensorShape& out_backprop_shape, + const std::vector& strides, + Padding padding, TensorFormat data_format, + ConvBackpropDimensions* dims) { + static constexpr std::array one_dilations = {{1, 1, 1, 1, 1}}; + return ConvBackpropComputeDimensionsV2( + label, num_spatial_dims, input_shape, filter_shape, out_backprop_shape, + one_dilations, strides, padding, data_format, dims); +} + } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h index 2926bb3a86..3a3492304b 100644 --- a/tensorflow/core/kernels/conv_grad_ops.h +++ b/tensorflow/core/kernels/conv_grad_ops.h @@ -212,6 +212,7 @@ struct ConvBackpropSpatialDimension { int64 filter_size; int64 output_size; int64 stride; + int64 dilation; int64 expanded_output_size; // Number of padding elements to be added before/after this dimension of @@ -242,6 +243,13 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims); +// The V2 version computes the same outputs with arbitrary dilation rate. +// TODO(b/67112639): Merge V2 versions and the original versions eventually. +Status ConvBackpropComputeDimensionsV2( + StringPiece label, int num_spatial_dims, const TensorShape& input_shape, + const TensorShape& filter_shape, const TensorShape& out_backprop_shape, + const std::vector& dilations, const std::vector& strides, + Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims); } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_CONV_GRAD_OPS_H_ -- GitLab From 3cf41b2edd4384a9df385430868dbdd887ecab86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 13:07:44 -0700 Subject: [PATCH 004/909] Test save/restore variable from graph_callable. PiperOrigin-RevId: 171051237 --- tensorflow/contrib/eager/python/BUILD | 1 + tensorflow/contrib/eager/python/saver_test.py | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index dd305a78dc..9185c963f7 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -81,6 +81,7 @@ cuda_py_test( "//tensorflow/python:array_ops", "//tensorflow/python:client", "//tensorflow/python:client_testlib", + "//tensorflow/python/eager:graph_callable", "//tensorflow/python:platform_test", "//tensorflow/python:variables", ], diff --git a/tensorflow/contrib/eager/python/saver_test.py b/tensorflow/contrib/eager/python/saver_test.py index cdec50ebd7..29af2b531f 100644 --- a/tensorflow/contrib/eager/python/saver_test.py +++ b/tensorflow/contrib/eager/python/saver_test.py @@ -21,10 +21,14 @@ import os from tensorflow.contrib.eager.python import saver as _saver from tensorflow.python.eager import context +from tensorflow.python.eager import graph_callable +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -87,6 +91,53 @@ class SaverTest(test.TestCase): with _saver.restore_variables_on_create(ckpt_prefix): _ = model(resource_variable_ops.ResourceVariable(1.0, name='v2')) + def testSaveRestoreGraphCallable(self): + with context.eager_mode(), ops.device(self._dev()): + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) + def model(x): + v = variable_scope.get_variable( + 'v', initializer=init_ops.zeros_initializer(), shape=()) + return v + x + + # Default 2 + 0 = 2 + self.assertEqual( + 2, model(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + + # Save the variable value 0. + ckpt_prefix = os.path.join(test.get_temp_dir(), 'ckpt') + _saver.Saver(model.variables).save(ckpt_prefix) + + # update variable to 1, so that 2 + 1 = 3 + model.variables[0].assign(1.) + self.assertEqual( + 3, model(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + + # load the variable value 0, so that 2 + 0 = 2 + _saver.Saver(model.variables).restore(ckpt_prefix) + self.assertEqual( + 2, model(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + + # update checkpoint variable to 1 and memory value to 2. + model.variables[0].assign(1.) + _saver.Saver(model.variables).save(ckpt_prefix) + model.variables[0].assign(2.) + self.assertEqual( + 4, model(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + + # reset the graph and reload on create, so that 1 + 2 = 3 + with ops.Graph().as_default(): + with _saver.restore_variables_on_create(ckpt_prefix): + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) + def model2(x): + v = variable_scope.get_variable( + 'v', initializer=init_ops.zeros_initializer(), shape=()) + return v + x + + self.assertEqual( + 3, model2(array_ops.constant(2, dtype=dtypes.float32)).numpy()) + if __name__ == '__main__': test.main() -- GitLab From ad69076ebd4c40226d0cd0f61ec1d4138d6bc46f Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Wed, 4 Oct 2017 13:14:04 -0700 Subject: [PATCH 005/909] Added get variable utils to tf.estimator.Estimator. PiperOrigin-RevId: 171052121 --- tensorflow/python/estimator/estimator.py | 35 ++++++++++++++++++ tensorflow/python/estimator/estimator_test.py | 37 +++++++++++++++++++ ...nsorflow.estimator.-d-n-n-classifier.pbtxt | 8 ++++ ...or.-d-n-n-linear-combined-classifier.pbtxt | 8 ++++ ...tor.-d-n-n-linear-combined-regressor.pbtxt | 8 ++++ ...ensorflow.estimator.-d-n-n-regressor.pbtxt | 8 ++++ .../tensorflow.estimator.-estimator.pbtxt | 8 ++++ ...sorflow.estimator.-linear-classifier.pbtxt | 8 ++++ ...nsorflow.estimator.-linear-regressor.pbtxt | 8 ++++ 9 files changed, 128 insertions(+) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index eee48419b0..1197366256 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -204,6 +204,34 @@ class Estimator(object): return public_model_fn + # TODO(ispir): support a list of names + def get_variable_value(self, name): + """Returns value of the variable given by name. + + Args: + name: string or a list of string, name of the tensor. + + Returns: + Numpy array - value of the tensor. + + Raises: + ValueError: If the Estimator has not produced a checkpoint yet. + """ + _check_checkpoint_available(self.model_dir) + return training.load_variable(self.model_dir, name) + + def get_variable_names(self): + """Returns list of all variable names in this model. + + Returns: + List of names. + + Raises: + ValueError: If the Estimator has not produced a checkpoint yet. + """ + _check_checkpoint_available(self.model_dir) + return [name for name, _ in training.list_variables(self.model_dir)] + def latest_checkpoint(self): """Finds the filename of latest saved checkpoint file in `model_dir`. @@ -818,6 +846,13 @@ class Estimator(object): return eval_results +def _check_checkpoint_available(model_dir): + latest_path = saver.latest_checkpoint(model_dir) + if not latest_path: + raise ValueError( + 'Could not find trained model in model_dir: {}.'.format(model_dir)) + + def _check_hooks_type(hooks): """Returns hooks if all are SessionRunHook, raises TypeError otherwise.""" hooks = list(hooks or []) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index e532d3bd2b..cdffe3378f 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -862,6 +862,43 @@ class _StepCounterHook(session_run_hook.SessionRunHook): return self._steps +class EstimatorGetVariablesTest(test.TestCase): + + def test_model_should_be_trained(self): + + def _model_fn(features, labels, mode): + _, _ = features, labels + variables.Variable(1., name='one') + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + with self.assertRaisesRegexp(ValueError, 'not find trained model'): + est.get_variable_names() + with self.assertRaisesRegexp(ValueError, 'not find trained model'): + est.get_variable_value('one') + + def test_get_variable_utils(self): + + def _model_fn(features, labels, mode): + _, _ = features, labels + variables.Variable(1., name='one') + variables.Variable(3., name='three') + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1)) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(input_fn=dummy_input_fn, steps=1) + self.assertEqual( + set(['one', 'three', 'global_step']), set(est.get_variable_names())) + self.assertEqual(1., est.get_variable_value('one')) + self.assertEqual(3., est.get_variable_value('three')) + + class EstimatorEvaluateTest(test.TestCase): def test_input_fn_args(self): diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt index b54e8517c7..16e3b24615 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt index eb3a8eedbe..c6765ae277 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt index 42003052f5..e3a820db46 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt index 32f5e8810a..a4c8cf6671 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt index 78e1c75b13..787952eced 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt @@ -30,6 +30,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt index cb3b5d01ff..99c03aa629 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt index e5d596887e..e2ab96d5b4 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt @@ -31,6 +31,14 @@ tf_class { name: "export_savedmodel" argspec: "args=[\'self\', \'export_dir_base\', \'serving_input_receiver_fn\', \'assets_extra\', \'as_text\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'None\'], " } + member_method { + name: "get_variable_names" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_variable_value" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "latest_checkpoint" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" -- GitLab From 6c954d0b3f02ea586a5fd3f9c2ea13bf8473d17f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 13:16:05 -0700 Subject: [PATCH 006/909] Adding TF Boosted trees regression example on boston dataset, minor fix for mnist example. PiperOrigin-RevId: 171052367 --- .../contrib/boosted_trees/examples/boston.py | 155 ++++++++++++++++++ .../contrib/boosted_trees/examples/mnist.py | 4 +- 2 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/boosted_trees/examples/boston.py diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py new file mode 100644 index 0000000000..0cb9e956ef --- /dev/null +++ b/tensorflow/contrib/boosted_trees/examples/boston.py @@ -0,0 +1,155 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Demonstrates a regression on Boston housing data. + + This example demonstrates how to run experiments with TF Boosted Trees on + a regression dataset. We split all the data into 20% test and 80% train, + and are using l2 loss and l2 regularization. + + Example Usage: + + python tensorflow/contrib/boosted_trees/examples/boston.py \ + --batch_size=404 --output_dir="/tmp/boston" --depth=4 --learning_rate=0.1 \ + --num_eval_steps=1 --num_trees=500 --l2=4 \ + --vmodule=training_ops=1 + + When training is done, mean squared error on eval data is reported. + Point tensorboard to the directory for the run to see how the training + progresses: + + tensorboard --logdir=/tmp/boston + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import tensorflow as tf +from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeRegressor +from tensorflow.contrib.boosted_trees.proto import learner_pb2 +from tensorflow.contrib.layers.python.layers import feature_column +from tensorflow.contrib.learn import learn_runner + +_TEST_SPLIT_RATIO = 0.2 +_TEST_SPLIT_SEED = 42 +_BOSTON_NUM_FEATURES = 13 + + +# Main config - creates a TF Boosted Trees Estimator based on flags. +def _get_tfbt(output_dir, feature_cols): + """Configures TF Boosted Trees estimator based on flags.""" + learner_config = learner_pb2.LearnerConfig() + + learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate + learner_config.regularization.l1 = 0.0 + # Set the regularization per instance in such a way that + # regularization for the full training data is equal to l2 flag. + learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size + learner_config.constraints.max_tree_depth = FLAGS.depth + learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE + + run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) + + # Create a TF Boosted trees regression estimator. + estimator = GradientBoostedDecisionTreeRegressor( + learner_config=learner_config, + # For the WHOLE_TREE strategy, set the examples_per_layer to be equal to + # batch size. + examples_per_layer=FLAGS.batch_size, + feature_columns=feature_cols, + label_dimension=1, + model_dir=output_dir, + num_trees=FLAGS.num_trees, + center_bias=False, + config=run_config) + return estimator + + +def _make_experiment_fn(output_dir): + """Creates experiment for gradient boosted decision trees.""" + (x_train, y_train), (x_test, + y_test) = tf.keras.datasets.boston_housing.load_data() + + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"x": x_train}, + y=y_train, + batch_size=FLAGS.batch_size, + num_epochs=None, + shuffle=True) + + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False) + + feature_columns = [ + feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES) + ] + + return tf.contrib.learn.Experiment( + estimator=_get_tfbt(output_dir, feature_columns), + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + train_steps=None, + eval_steps=FLAGS.num_eval_steps, + eval_metrics=None) + + +def main(unused_argv): + learn_runner.run( + experiment_fn=_make_experiment_fn, + output_dir=FLAGS.output_dir, + schedule="train_and_evaluate") + + +if __name__ == "__main__": + tf.logging.set_verbosity(tf.logging.INFO) + parser = argparse.ArgumentParser() + # Define the list of flags that users can change. + parser.add_argument( + "--batch_size", + type=int, + default=1000, + help="The batch size for reading data.") + parser.add_argument( + "--output_dir", + type=str, + required=True, + help="Choose the dir for the output.") + parser.add_argument( + "--num_eval_steps", + type=int, + default=1, + help="The number of steps to run evaluation for.") + # Flags for gradient boosted trees config. + parser.add_argument( + "--depth", type=int, default=4, help="Maximum depth of weak learners.") + parser.add_argument( + "--l2", type=float, default=1.0, help="l2 regularization per batch.") + parser.add_argument( + "--learning_rate", + type=float, + default=0.1, + help="Learning rate (shrinkage weight) with which each new tree is added." + ) + parser.add_argument( + "--num_trees", + type=int, + default=None, + required=True, + help="Number of trees to grow before stopping.") + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/boosted_trees/examples/mnist.py b/tensorflow/contrib/boosted_trees/examples/mnist.py index 7e34d2f2d3..a3b1cb5154 100644 --- a/tensorflow/contrib/boosted_trees/examples/mnist.py +++ b/tensorflow/contrib/boosted_trees/examples/mnist.py @@ -129,8 +129,8 @@ def _get_tfbt(output_dir): def _make_experiment_fn(output_dir): """Creates experiment for gradient boosted decision trees.""" data = tf.contrib.learn.datasets.mnist.load_mnist() - train_input_fn = get_input_fn(data.train, batch_size=256) - eval_input_fn = get_input_fn(data.validation, batch_size=5000) + train_input_fn = get_input_fn(data.train, FLAGS.batch_size) + eval_input_fn = get_input_fn(data.validation, FLAGS.eval_batch_size) return tf.contrib.learn.Experiment( estimator=_get_tfbt(output_dir), -- GitLab From 15155493b941a28d2d9c1e1cb1ed5873612b360a Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 4 Oct 2017 13:26:11 -0700 Subject: [PATCH 007/909] Fast path for tf.conj when it should be pass-through. PiperOrigin-RevId: 171053662 --- tensorflow/python/ops/math_ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 131f3724eb..9383d72f14 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2317,6 +2317,10 @@ def conj(x, name=None): Raises: TypeError: If `x` is not a numeric tensor. """ + if isinstance(x, ops.Tensor): + dt = x.dtype + if dt.is_floating or dt.is_integer: + return x with ops.name_scope(name, "Conj", [x]) as name: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex or x.dtype == dtypes.variant: -- GitLab From 2fe6cf285d2bf4222ea09f9e929e538b64bc376b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 13:26:47 -0700 Subject: [PATCH 008/909] Internal cleanup PiperOrigin-RevId: 171053770 --- tensorflow/python/eager/execute.py | 10 ++++++--- tensorflow/python/layers/base.py | 22 ++++++++++++++----- tensorflow/python/layers/normalization.py | 2 +- .../python/ops/resource_variable_ops.py | 12 ++-------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 8bb4c0687d..04634daba4 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -168,27 +168,31 @@ def make_tensor(v, arg_name): def args_to_matching_eager(l, ctx, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" + EagerTensor = ops.EagerTensor # pylint: disable=invalid-name + if all(isinstance(x, EagerTensor) for x in l): + return l[0].dtype, l # TODO(josh11b): Could we do a better job if we also passed in the # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None for t in l: - if isinstance(t, ops.EagerTensor): + if isinstance(t, EagerTensor): dtype = t.dtype break + internal_convert_to_tensor = ops.internal_convert_to_tensor if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. ret = [] for t in l: - ret.append(ops.internal_convert_to_tensor( + ret.append(internal_convert_to_tensor( t, dtype, preferred_dtype=default_dtype, ctx=ctx)) if dtype is None: dtype = ret[-1].dtype else: - ret = [ops.internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] + ret = [internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] return dtype, ret diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 9e7cdd493f..1e11d1ae8d 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -112,8 +112,10 @@ class Layer(object): self._per_input_losses = {} self._per_input_updates = {} self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name - self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) - or hasattr(self, 'compute_mask')) + call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in call_fn_args or + hasattr(self, 'compute_mask')) + self._call_has_scope_arg = 'scope' in call_fn_args # These lists will be filled via successive calls # to self._add_inbound_node(). @@ -555,7 +557,15 @@ class Layer(object): self.build(input_shapes[0]) else: self.build(input_shapes) - if 'scope' in estimator_util.fn_args(self.call): + try: + # Note: not all sub-classes of Layer call Layer.__init__ (especially + # the ones under tensorflow/python/keras). Hence we recompute this + # attribute here if it is not set. + # TODO(agarwal): Fix the sub-classes and avoid this complexity. + call_has_scope_arg = self._call_has_scope_arg + except AttributeError: + call_has_scope_arg = 'scope' in estimator_util.fn_args(self.call) + if call_has_scope_arg: kwargs['scope'] = scope # Check input assumptions set after layer building, e.g. input shape. if in_graph_mode: @@ -1433,8 +1443,10 @@ class Network(Layer): self._activity_regularizer = None self._scope = next(vs.variable_scope(None, default_name=base_name).gen) self._base_name = base_name - self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) - or hasattr(self, 'compute_mask')) + call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in call_fn_args or + hasattr(self, 'compute_mask')) + self._call_has_scope_arg = 'scope' in call_fn_args # This acts just like the `trainable` attribute of any layer instance. # It does not affect users of the underlying layers, only users of the diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 0521129b27..ebcf397625 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -330,7 +330,7 @@ class BatchNormalization(base.Layer): lambda: self._one_minus_decay, lambda: 0.) else: - one_minus_decay = self._one_minus_decay + one_minus_decay = ops.convert_to_tensor(self._one_minus_decay) if training_value or training_value is None: mean_update = self._assign_moving_average(self.moving_mean, mean, one_minus_decay) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index bf4759e9ee..4ef9b05d51 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -540,16 +540,8 @@ class ResourceVariable(variables.Variable): the read operation. """ with ops.name_scope("Read"): - # In graph mode, ensure we read the variable in the same device as the - # handle. In eager mode, however, this sometimes tries to read a GPU - # variable in the CPU because the handle is host memory. For now, then, we - # need to skip the device block in eager. TODO(apassos): eager should have - # separate notions of device and memory, so handle.device can be GPU while - # handle.memory_space is always CPU. - if context.in_graph_mode(): - with ops.device(self._handle_device): - value = self._read_variable_op() - else: + # Ensure we read the variable in the same device as the handle. + with ops.device(self._handle_device): value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. -- GitLab From 083bd5dde5e6845a6f5e3b83ea2e074d7b28d61f Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 4 Oct 2017 13:33:07 -0700 Subject: [PATCH 009/909] Java: Add support for loading op libraries dynamically. This change adds the equivalent of tf.load_op_library in Python to Java. (https://github.com/tensorflow/tensorflow/commit/5c7f9e316d8c7735308a217310350d416d7498cc was required to make this possible) Though, TensorFlow.loadLibrary() is likely to fail on Windows as symbols required by custom op libraries (those exported by the tensorflow_framework library) are not exported by the monolithic JNI library yet. This should help with #10454 and #13476 PiperOrigin-RevId: 171054707 --- tensorflow/java/BUILD | 9 ++++- .../main/java/org/tensorflow/TensorFlow.java | 30 ++++++++++++++++ .../java/src/main/native/tensorflow_jni.cc | 35 +++++++++++++++++++ .../java/src/main/native/tensorflow_jni.h | 30 ++++++++++++++-- .../java/org/tensorflow/TensorFlowTest.java | 23 ++++++++++++ tensorflow/java/src/test/native/my_test_op.cc | 21 +++++++++++ 6 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 tensorflow/java/src/test/native/my_test_op.cc diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 9de79af7d2..a380bc2c71 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -10,8 +10,9 @@ load(":src/gen/gen_ops.bzl", "tf_java_op_gen_srcjar") load( "//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs", - "tf_copts", "tf_cc_binary", + "tf_copts", + "tf_custom_op_library", "tf_java_test", ) @@ -180,10 +181,16 @@ tf_java_test( ], ) +tf_custom_op_library( + name = "my_test_op.so", + srcs = ["src/test/native/my_test_op.cc"], +) + tf_java_test( name = "TensorFlowTest", size = "small", srcs = ["src/test/java/org/tensorflow/TensorFlowTest.java"], + data = [":my_test_op.so"], javacopts = JAVACOPTS, test_class = "org.tensorflow.TensorFlowTest", deps = [ diff --git a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java index c21214b763..c90655f25d 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java +++ b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java @@ -29,6 +29,36 @@ public final class TensorFlow { */ public static native byte[] registeredOpList(); + /** + * Load the dynamic library in filename and register the operations and kernels present in that + * library. + * + * @param filename Path of the dynamic library containing operations and kernels to load. + * @return Serialized bytes of the OpList + * protocol buffer message defining the operations defined in the library. + * @throws UnsatisfiedLinkError if filename cannot be loaded. + */ + public static byte[] loadLibrary(String filename) { + long h = 0; + try { + h = libraryLoad(filename); + } catch (RuntimeException e) { + throw new UnsatisfiedLinkError(e.getMessage()); + } + try { + return libraryOpList(h); + } finally { + libraryDelete(h); + } + } + + private static native long libraryLoad(String filename); + + private static native void libraryDelete(long handle); + + private static native byte[] libraryOpList(long handle); + private TensorFlow() {} /** Load the TensorFlow runtime C library. */ diff --git a/tensorflow/java/src/main/native/tensorflow_jni.cc b/tensorflow/java/src/main/native/tensorflow_jni.cc index c553582e38..946ab502d1 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.cc +++ b/tensorflow/java/src/main/native/tensorflow_jni.cc @@ -14,7 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/java/src/main/native/tensorflow_jni.h" + +#include #include "tensorflow/c/c_api.h" +#include "tensorflow/java/src/main/native/exception_jni.h" JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv* env, jclass clazz) { @@ -30,3 +33,35 @@ Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv* env, jclass clazz) { TF_DeleteBuffer(buf); return ret; } + +JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad( + JNIEnv* env, jclass clazz, jstring filename) { + TF_Status* status = TF_NewStatus(); + const char* cname = env->GetStringUTFChars(filename, nullptr); + TF_Library* h = TF_LoadLibrary(cname, status); + throwExceptionIfNotOK(env, status); + env->ReleaseStringUTFChars(filename, cname); + TF_DeleteStatus(status); + return reinterpret_cast(h); +} + +JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete( + JNIEnv* env, jclass clazz, jlong handle) { + if (handle != 0) { + TF_DeleteLibraryHandle(reinterpret_cast(handle)); + } +} + +JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_TensorFlow_libraryOpList( + JNIEnv* env, jclass clazz, jlong handle) { + TF_Buffer buf = TF_GetOpList(reinterpret_cast(handle)); + if (buf.length > std::numeric_limits::max()) { + throwException(env, kIndexOutOfBoundsException, + "Serialized OpList is too large for a byte[] array"); + return nullptr; + } + auto ret_len = static_cast(buf.length); + jbyteArray ret = env->NewByteArray(ret_len); + env->SetByteArrayRegion(ret, 0, ret_len, static_cast(buf.data)); + return ret; +} diff --git a/tensorflow/java/src/main/native/tensorflow_jni.h b/tensorflow/java/src/main/native/tensorflow_jni.h index ecd9b15828..c0c9322020 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.h +++ b/tensorflow/java/src/main/native/tensorflow_jni.h @@ -27,7 +27,7 @@ extern "C" { * Method: version * Signature: ()Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv*, +JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv *, jclass); /* @@ -36,7 +36,33 @@ JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv*, * Signature: ()[B */ JNIEXPORT jbyteArray JNICALL -Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv*, jclass); +Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv *, jclass); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryLoad + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad(JNIEnv *, + jclass, + jstring); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryDelete + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete(JNIEnv *, + jclass, + jlong); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryOpList + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL +Java_org_tensorflow_TensorFlow_libraryOpList(JNIEnv *, jclass, jlong); #ifdef __cplusplus } // extern "C" diff --git a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java index a31ea900d1..b1fa3f0d7e 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java @@ -16,6 +16,7 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,4 +37,26 @@ public class TensorFlowTest { // was not sorted out. Revisit? Till then, at least exercise the code. assertTrue(TensorFlow.registeredOpList().length > 0); } + + @Test + public void loadLibrary() { + // TODO(ashankar): This tell will fail when built with --config=monolithic. + // Figure out how we can ignore the test in that case. + try (Graph g = new Graph()) { + // Build a graph with an unrecognized operation. + try { + g.opBuilder("MyTest", "MyTest").build(); + fail("should not be able to construct graphs with unregistered ops"); + } catch (IllegalArgumentException e) { + // expected exception + } + + // Load the library containing the operation. + byte[] opList = TensorFlow.loadLibrary("tensorflow/java/my_test_op.so"); + assertTrue(opList.length > 0); + + // Now graph building should succeed. + g.opBuilder("MyTest", "MyTest").build(); + } + } } diff --git a/tensorflow/java/src/test/native/my_test_op.cc b/tensorflow/java/src/test/native/my_test_op.cc new file mode 100644 index 0000000000..eb755901ed --- /dev/null +++ b/tensorflow/java/src/test/native/my_test_op.cc @@ -0,0 +1,21 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +REGISTER_OP("MyTest") + .Doc("Custom operation for testing.") + .SetShapeFn(tensorflow::shape_inference::UnknownShape); -- GitLab From d66e77f7c3ad4e5880af5ed3f287e472b6873f93 Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Wed, 4 Oct 2017 13:14:04 -0700 Subject: [PATCH 010/909] Added get variable utils to tf.estimator.Estimator. PiperOrigin-RevId: 171052121 --- .../contrib/boosted_trees/examples/boston.py | 155 ------------------ .../contrib/boosted_trees/examples/mnist.py | 4 +- tensorflow/java/BUILD | 9 +- .../main/java/org/tensorflow/TensorFlow.java | 30 ---- .../java/src/main/native/tensorflow_jni.cc | 35 ---- .../java/src/main/native/tensorflow_jni.h | 30 +--- .../java/org/tensorflow/TensorFlowTest.java | 23 --- tensorflow/java/src/test/native/my_test_op.cc | 21 --- tensorflow/python/eager/execute.py | 10 +- tensorflow/python/layers/base.py | 22 +-- tensorflow/python/layers/normalization.py | 2 +- tensorflow/python/ops/math_ops.py | 4 - .../python/ops/resource_variable_ops.py | 12 +- 13 files changed, 24 insertions(+), 333 deletions(-) delete mode 100644 tensorflow/contrib/boosted_trees/examples/boston.py delete mode 100644 tensorflow/java/src/test/native/my_test_op.cc diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py deleted file mode 100644 index 0cb9e956ef..0000000000 --- a/tensorflow/contrib/boosted_trees/examples/boston.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Demonstrates a regression on Boston housing data. - - This example demonstrates how to run experiments with TF Boosted Trees on - a regression dataset. We split all the data into 20% test and 80% train, - and are using l2 loss and l2 regularization. - - Example Usage: - - python tensorflow/contrib/boosted_trees/examples/boston.py \ - --batch_size=404 --output_dir="/tmp/boston" --depth=4 --learning_rate=0.1 \ - --num_eval_steps=1 --num_trees=500 --l2=4 \ - --vmodule=training_ops=1 - - When training is done, mean squared error on eval data is reported. - Point tensorboard to the directory for the run to see how the training - progresses: - - tensorboard --logdir=/tmp/boston - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import tensorflow as tf -from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeRegressor -from tensorflow.contrib.boosted_trees.proto import learner_pb2 -from tensorflow.contrib.layers.python.layers import feature_column -from tensorflow.contrib.learn import learn_runner - -_TEST_SPLIT_RATIO = 0.2 -_TEST_SPLIT_SEED = 42 -_BOSTON_NUM_FEATURES = 13 - - -# Main config - creates a TF Boosted Trees Estimator based on flags. -def _get_tfbt(output_dir, feature_cols): - """Configures TF Boosted Trees estimator based on flags.""" - learner_config = learner_pb2.LearnerConfig() - - learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate - learner_config.regularization.l1 = 0.0 - # Set the regularization per instance in such a way that - # regularization for the full training data is equal to l2 flag. - learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size - learner_config.constraints.max_tree_depth = FLAGS.depth - learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE - - run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) - - # Create a TF Boosted trees regression estimator. - estimator = GradientBoostedDecisionTreeRegressor( - learner_config=learner_config, - # For the WHOLE_TREE strategy, set the examples_per_layer to be equal to - # batch size. - examples_per_layer=FLAGS.batch_size, - feature_columns=feature_cols, - label_dimension=1, - model_dir=output_dir, - num_trees=FLAGS.num_trees, - center_bias=False, - config=run_config) - return estimator - - -def _make_experiment_fn(output_dir): - """Creates experiment for gradient boosted decision trees.""" - (x_train, y_train), (x_test, - y_test) = tf.keras.datasets.boston_housing.load_data() - - train_input_fn = tf.estimator.inputs.numpy_input_fn( - x={"x": x_train}, - y=y_train, - batch_size=FLAGS.batch_size, - num_epochs=None, - shuffle=True) - - eval_input_fn = tf.estimator.inputs.numpy_input_fn( - x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False) - - feature_columns = [ - feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES) - ] - - return tf.contrib.learn.Experiment( - estimator=_get_tfbt(output_dir, feature_columns), - train_input_fn=train_input_fn, - eval_input_fn=eval_input_fn, - train_steps=None, - eval_steps=FLAGS.num_eval_steps, - eval_metrics=None) - - -def main(unused_argv): - learn_runner.run( - experiment_fn=_make_experiment_fn, - output_dir=FLAGS.output_dir, - schedule="train_and_evaluate") - - -if __name__ == "__main__": - tf.logging.set_verbosity(tf.logging.INFO) - parser = argparse.ArgumentParser() - # Define the list of flags that users can change. - parser.add_argument( - "--batch_size", - type=int, - default=1000, - help="The batch size for reading data.") - parser.add_argument( - "--output_dir", - type=str, - required=True, - help="Choose the dir for the output.") - parser.add_argument( - "--num_eval_steps", - type=int, - default=1, - help="The number of steps to run evaluation for.") - # Flags for gradient boosted trees config. - parser.add_argument( - "--depth", type=int, default=4, help="Maximum depth of weak learners.") - parser.add_argument( - "--l2", type=float, default=1.0, help="l2 regularization per batch.") - parser.add_argument( - "--learning_rate", - type=float, - default=0.1, - help="Learning rate (shrinkage weight) with which each new tree is added." - ) - parser.add_argument( - "--num_trees", - type=int, - default=None, - required=True, - help="Number of trees to grow before stopping.") - - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/boosted_trees/examples/mnist.py b/tensorflow/contrib/boosted_trees/examples/mnist.py index a3b1cb5154..7e34d2f2d3 100644 --- a/tensorflow/contrib/boosted_trees/examples/mnist.py +++ b/tensorflow/contrib/boosted_trees/examples/mnist.py @@ -129,8 +129,8 @@ def _get_tfbt(output_dir): def _make_experiment_fn(output_dir): """Creates experiment for gradient boosted decision trees.""" data = tf.contrib.learn.datasets.mnist.load_mnist() - train_input_fn = get_input_fn(data.train, FLAGS.batch_size) - eval_input_fn = get_input_fn(data.validation, FLAGS.eval_batch_size) + train_input_fn = get_input_fn(data.train, batch_size=256) + eval_input_fn = get_input_fn(data.validation, batch_size=5000) return tf.contrib.learn.Experiment( estimator=_get_tfbt(output_dir), diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index a380bc2c71..9de79af7d2 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -10,9 +10,8 @@ load(":src/gen/gen_ops.bzl", "tf_java_op_gen_srcjar") load( "//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs", - "tf_cc_binary", "tf_copts", - "tf_custom_op_library", + "tf_cc_binary", "tf_java_test", ) @@ -181,16 +180,10 @@ tf_java_test( ], ) -tf_custom_op_library( - name = "my_test_op.so", - srcs = ["src/test/native/my_test_op.cc"], -) - tf_java_test( name = "TensorFlowTest", size = "small", srcs = ["src/test/java/org/tensorflow/TensorFlowTest.java"], - data = [":my_test_op.so"], javacopts = JAVACOPTS, test_class = "org.tensorflow.TensorFlowTest", deps = [ diff --git a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java index c90655f25d..c21214b763 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java +++ b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java @@ -29,36 +29,6 @@ public final class TensorFlow { */ public static native byte[] registeredOpList(); - /** - * Load the dynamic library in filename and register the operations and kernels present in that - * library. - * - * @param filename Path of the dynamic library containing operations and kernels to load. - * @return Serialized bytes of the OpList - * protocol buffer message defining the operations defined in the library. - * @throws UnsatisfiedLinkError if filename cannot be loaded. - */ - public static byte[] loadLibrary(String filename) { - long h = 0; - try { - h = libraryLoad(filename); - } catch (RuntimeException e) { - throw new UnsatisfiedLinkError(e.getMessage()); - } - try { - return libraryOpList(h); - } finally { - libraryDelete(h); - } - } - - private static native long libraryLoad(String filename); - - private static native void libraryDelete(long handle); - - private static native byte[] libraryOpList(long handle); - private TensorFlow() {} /** Load the TensorFlow runtime C library. */ diff --git a/tensorflow/java/src/main/native/tensorflow_jni.cc b/tensorflow/java/src/main/native/tensorflow_jni.cc index 946ab502d1..c553582e38 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.cc +++ b/tensorflow/java/src/main/native/tensorflow_jni.cc @@ -14,10 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/java/src/main/native/tensorflow_jni.h" - -#include #include "tensorflow/c/c_api.h" -#include "tensorflow/java/src/main/native/exception_jni.h" JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv* env, jclass clazz) { @@ -33,35 +30,3 @@ Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv* env, jclass clazz) { TF_DeleteBuffer(buf); return ret; } - -JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad( - JNIEnv* env, jclass clazz, jstring filename) { - TF_Status* status = TF_NewStatus(); - const char* cname = env->GetStringUTFChars(filename, nullptr); - TF_Library* h = TF_LoadLibrary(cname, status); - throwExceptionIfNotOK(env, status); - env->ReleaseStringUTFChars(filename, cname); - TF_DeleteStatus(status); - return reinterpret_cast(h); -} - -JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete( - JNIEnv* env, jclass clazz, jlong handle) { - if (handle != 0) { - TF_DeleteLibraryHandle(reinterpret_cast(handle)); - } -} - -JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_TensorFlow_libraryOpList( - JNIEnv* env, jclass clazz, jlong handle) { - TF_Buffer buf = TF_GetOpList(reinterpret_cast(handle)); - if (buf.length > std::numeric_limits::max()) { - throwException(env, kIndexOutOfBoundsException, - "Serialized OpList is too large for a byte[] array"); - return nullptr; - } - auto ret_len = static_cast(buf.length); - jbyteArray ret = env->NewByteArray(ret_len); - env->SetByteArrayRegion(ret, 0, ret_len, static_cast(buf.data)); - return ret; -} diff --git a/tensorflow/java/src/main/native/tensorflow_jni.h b/tensorflow/java/src/main/native/tensorflow_jni.h index c0c9322020..ecd9b15828 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.h +++ b/tensorflow/java/src/main/native/tensorflow_jni.h @@ -27,7 +27,7 @@ extern "C" { * Method: version * Signature: ()Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv *, +JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv*, jclass); /* @@ -36,33 +36,7 @@ JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv *, * Signature: ()[B */ JNIEXPORT jbyteArray JNICALL -Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv *, jclass); - -/* - * Class: org_tensorflow_TensorFlow - * Method: libraryLoad - * Signature: (Ljava/lang/String;)J - */ -JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad(JNIEnv *, - jclass, - jstring); - -/* - * Class: org_tensorflow_TensorFlow - * Method: libraryDelete - * Signature: (J)V - */ -JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete(JNIEnv *, - jclass, - jlong); - -/* - * Class: org_tensorflow_TensorFlow - * Method: libraryOpList - * Signature: (J)[B - */ -JNIEXPORT jbyteArray JNICALL -Java_org_tensorflow_TensorFlow_libraryOpList(JNIEnv *, jclass, jlong); +Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv*, jclass); #ifdef __cplusplus } // extern "C" diff --git a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java index b1fa3f0d7e..a31ea900d1 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java @@ -16,7 +16,6 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import org.junit.Test; import org.junit.runner.RunWith; @@ -37,26 +36,4 @@ public class TensorFlowTest { // was not sorted out. Revisit? Till then, at least exercise the code. assertTrue(TensorFlow.registeredOpList().length > 0); } - - @Test - public void loadLibrary() { - // TODO(ashankar): This tell will fail when built with --config=monolithic. - // Figure out how we can ignore the test in that case. - try (Graph g = new Graph()) { - // Build a graph with an unrecognized operation. - try { - g.opBuilder("MyTest", "MyTest").build(); - fail("should not be able to construct graphs with unregistered ops"); - } catch (IllegalArgumentException e) { - // expected exception - } - - // Load the library containing the operation. - byte[] opList = TensorFlow.loadLibrary("tensorflow/java/my_test_op.so"); - assertTrue(opList.length > 0); - - // Now graph building should succeed. - g.opBuilder("MyTest", "MyTest").build(); - } - } } diff --git a/tensorflow/java/src/test/native/my_test_op.cc b/tensorflow/java/src/test/native/my_test_op.cc deleted file mode 100644 index eb755901ed..0000000000 --- a/tensorflow/java/src/test/native/my_test_op.cc +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" - -REGISTER_OP("MyTest") - .Doc("Custom operation for testing.") - .SetShapeFn(tensorflow::shape_inference::UnknownShape); diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 04634daba4..8bb4c0687d 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -168,31 +168,27 @@ def make_tensor(v, arg_name): def args_to_matching_eager(l, ctx, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" - EagerTensor = ops.EagerTensor # pylint: disable=invalid-name - if all(isinstance(x, EagerTensor) for x in l): - return l[0].dtype, l # TODO(josh11b): Could we do a better job if we also passed in the # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None for t in l: - if isinstance(t, EagerTensor): + if isinstance(t, ops.EagerTensor): dtype = t.dtype break - internal_convert_to_tensor = ops.internal_convert_to_tensor if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. ret = [] for t in l: - ret.append(internal_convert_to_tensor( + ret.append(ops.internal_convert_to_tensor( t, dtype, preferred_dtype=default_dtype, ctx=ctx)) if dtype is None: dtype = ret[-1].dtype else: - ret = [internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] + ret = [ops.internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] return dtype, ret diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 1e11d1ae8d..9e7cdd493f 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -112,10 +112,8 @@ class Layer(object): self._per_input_losses = {} self._per_input_updates = {} self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name - call_fn_args = estimator_util.fn_args(self.call) - self._compute_previous_mask = ('mask' in call_fn_args or - hasattr(self, 'compute_mask')) - self._call_has_scope_arg = 'scope' in call_fn_args + self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) + or hasattr(self, 'compute_mask')) # These lists will be filled via successive calls # to self._add_inbound_node(). @@ -557,15 +555,7 @@ class Layer(object): self.build(input_shapes[0]) else: self.build(input_shapes) - try: - # Note: not all sub-classes of Layer call Layer.__init__ (especially - # the ones under tensorflow/python/keras). Hence we recompute this - # attribute here if it is not set. - # TODO(agarwal): Fix the sub-classes and avoid this complexity. - call_has_scope_arg = self._call_has_scope_arg - except AttributeError: - call_has_scope_arg = 'scope' in estimator_util.fn_args(self.call) - if call_has_scope_arg: + if 'scope' in estimator_util.fn_args(self.call): kwargs['scope'] = scope # Check input assumptions set after layer building, e.g. input shape. if in_graph_mode: @@ -1443,10 +1433,8 @@ class Network(Layer): self._activity_regularizer = None self._scope = next(vs.variable_scope(None, default_name=base_name).gen) self._base_name = base_name - call_fn_args = estimator_util.fn_args(self.call) - self._compute_previous_mask = ('mask' in call_fn_args or - hasattr(self, 'compute_mask')) - self._call_has_scope_arg = 'scope' in call_fn_args + self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) + or hasattr(self, 'compute_mask')) # This acts just like the `trainable` attribute of any layer instance. # It does not affect users of the underlying layers, only users of the diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index ebcf397625..0521129b27 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -330,7 +330,7 @@ class BatchNormalization(base.Layer): lambda: self._one_minus_decay, lambda: 0.) else: - one_minus_decay = ops.convert_to_tensor(self._one_minus_decay) + one_minus_decay = self._one_minus_decay if training_value or training_value is None: mean_update = self._assign_moving_average(self.moving_mean, mean, one_minus_decay) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 9383d72f14..131f3724eb 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2317,10 +2317,6 @@ def conj(x, name=None): Raises: TypeError: If `x` is not a numeric tensor. """ - if isinstance(x, ops.Tensor): - dt = x.dtype - if dt.is_floating or dt.is_integer: - return x with ops.name_scope(name, "Conj", [x]) as name: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex or x.dtype == dtypes.variant: diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 4ef9b05d51..bf4759e9ee 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -540,8 +540,16 @@ class ResourceVariable(variables.Variable): the read operation. """ with ops.name_scope("Read"): - # Ensure we read the variable in the same device as the handle. - with ops.device(self._handle_device): + # In graph mode, ensure we read the variable in the same device as the + # handle. In eager mode, however, this sometimes tries to read a GPU + # variable in the CPU because the handle is host memory. For now, then, we + # need to skip the device block in eager. TODO(apassos): eager should have + # separate notions of device and memory, so handle.device can be GPU while + # handle.memory_space is always CPU. + if context.in_graph_mode(): + with ops.device(self._handle_device): + value = self._read_variable_op() + else: value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. -- GitLab From c41dbc3c1832bc6c3662d4d942d095baa1fb49c9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 13:16:05 -0700 Subject: [PATCH 011/909] Adding TF Boosted trees regression example on boston dataset, minor fix for mnist example. PiperOrigin-RevId: 171052367 --- .../contrib/boosted_trees/examples/boston.py | 155 ++++++++++++++++++ .../contrib/boosted_trees/examples/mnist.py | 4 +- 2 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/boosted_trees/examples/boston.py diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py new file mode 100644 index 0000000000..0cb9e956ef --- /dev/null +++ b/tensorflow/contrib/boosted_trees/examples/boston.py @@ -0,0 +1,155 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Demonstrates a regression on Boston housing data. + + This example demonstrates how to run experiments with TF Boosted Trees on + a regression dataset. We split all the data into 20% test and 80% train, + and are using l2 loss and l2 regularization. + + Example Usage: + + python tensorflow/contrib/boosted_trees/examples/boston.py \ + --batch_size=404 --output_dir="/tmp/boston" --depth=4 --learning_rate=0.1 \ + --num_eval_steps=1 --num_trees=500 --l2=4 \ + --vmodule=training_ops=1 + + When training is done, mean squared error on eval data is reported. + Point tensorboard to the directory for the run to see how the training + progresses: + + tensorboard --logdir=/tmp/boston + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import tensorflow as tf +from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeRegressor +from tensorflow.contrib.boosted_trees.proto import learner_pb2 +from tensorflow.contrib.layers.python.layers import feature_column +from tensorflow.contrib.learn import learn_runner + +_TEST_SPLIT_RATIO = 0.2 +_TEST_SPLIT_SEED = 42 +_BOSTON_NUM_FEATURES = 13 + + +# Main config - creates a TF Boosted Trees Estimator based on flags. +def _get_tfbt(output_dir, feature_cols): + """Configures TF Boosted Trees estimator based on flags.""" + learner_config = learner_pb2.LearnerConfig() + + learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate + learner_config.regularization.l1 = 0.0 + # Set the regularization per instance in such a way that + # regularization for the full training data is equal to l2 flag. + learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size + learner_config.constraints.max_tree_depth = FLAGS.depth + learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE + + run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) + + # Create a TF Boosted trees regression estimator. + estimator = GradientBoostedDecisionTreeRegressor( + learner_config=learner_config, + # For the WHOLE_TREE strategy, set the examples_per_layer to be equal to + # batch size. + examples_per_layer=FLAGS.batch_size, + feature_columns=feature_cols, + label_dimension=1, + model_dir=output_dir, + num_trees=FLAGS.num_trees, + center_bias=False, + config=run_config) + return estimator + + +def _make_experiment_fn(output_dir): + """Creates experiment for gradient boosted decision trees.""" + (x_train, y_train), (x_test, + y_test) = tf.keras.datasets.boston_housing.load_data() + + train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"x": x_train}, + y=y_train, + batch_size=FLAGS.batch_size, + num_epochs=None, + shuffle=True) + + eval_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False) + + feature_columns = [ + feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES) + ] + + return tf.contrib.learn.Experiment( + estimator=_get_tfbt(output_dir, feature_columns), + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + train_steps=None, + eval_steps=FLAGS.num_eval_steps, + eval_metrics=None) + + +def main(unused_argv): + learn_runner.run( + experiment_fn=_make_experiment_fn, + output_dir=FLAGS.output_dir, + schedule="train_and_evaluate") + + +if __name__ == "__main__": + tf.logging.set_verbosity(tf.logging.INFO) + parser = argparse.ArgumentParser() + # Define the list of flags that users can change. + parser.add_argument( + "--batch_size", + type=int, + default=1000, + help="The batch size for reading data.") + parser.add_argument( + "--output_dir", + type=str, + required=True, + help="Choose the dir for the output.") + parser.add_argument( + "--num_eval_steps", + type=int, + default=1, + help="The number of steps to run evaluation for.") + # Flags for gradient boosted trees config. + parser.add_argument( + "--depth", type=int, default=4, help="Maximum depth of weak learners.") + parser.add_argument( + "--l2", type=float, default=1.0, help="l2 regularization per batch.") + parser.add_argument( + "--learning_rate", + type=float, + default=0.1, + help="Learning rate (shrinkage weight) with which each new tree is added." + ) + parser.add_argument( + "--num_trees", + type=int, + default=None, + required=True, + help="Number of trees to grow before stopping.") + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/boosted_trees/examples/mnist.py b/tensorflow/contrib/boosted_trees/examples/mnist.py index 7e34d2f2d3..a3b1cb5154 100644 --- a/tensorflow/contrib/boosted_trees/examples/mnist.py +++ b/tensorflow/contrib/boosted_trees/examples/mnist.py @@ -129,8 +129,8 @@ def _get_tfbt(output_dir): def _make_experiment_fn(output_dir): """Creates experiment for gradient boosted decision trees.""" data = tf.contrib.learn.datasets.mnist.load_mnist() - train_input_fn = get_input_fn(data.train, batch_size=256) - eval_input_fn = get_input_fn(data.validation, batch_size=5000) + train_input_fn = get_input_fn(data.train, FLAGS.batch_size) + eval_input_fn = get_input_fn(data.validation, FLAGS.eval_batch_size) return tf.contrib.learn.Experiment( estimator=_get_tfbt(output_dir), -- GitLab From cc8ee6c0f5270de5ef2baa0b21c44b0319813548 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 4 Oct 2017 13:26:11 -0700 Subject: [PATCH 012/909] Fast path for tf.conj when it should be pass-through. PiperOrigin-RevId: 171053662 --- tensorflow/python/ops/math_ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 131f3724eb..9383d72f14 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2317,6 +2317,10 @@ def conj(x, name=None): Raises: TypeError: If `x` is not a numeric tensor. """ + if isinstance(x, ops.Tensor): + dt = x.dtype + if dt.is_floating or dt.is_integer: + return x with ops.name_scope(name, "Conj", [x]) as name: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex or x.dtype == dtypes.variant: -- GitLab From e7c53698e09f63e6268888d0b9ebe779ce28a1e7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 13:26:47 -0700 Subject: [PATCH 013/909] Internal cleanup PiperOrigin-RevId: 171053770 --- tensorflow/python/eager/execute.py | 10 ++++++--- tensorflow/python/layers/base.py | 22 ++++++++++++++----- tensorflow/python/layers/normalization.py | 2 +- .../python/ops/resource_variable_ops.py | 12 ++-------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 8bb4c0687d..04634daba4 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -168,27 +168,31 @@ def make_tensor(v, arg_name): def args_to_matching_eager(l, ctx, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" + EagerTensor = ops.EagerTensor # pylint: disable=invalid-name + if all(isinstance(x, EagerTensor) for x in l): + return l[0].dtype, l # TODO(josh11b): Could we do a better job if we also passed in the # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None for t in l: - if isinstance(t, ops.EagerTensor): + if isinstance(t, EagerTensor): dtype = t.dtype break + internal_convert_to_tensor = ops.internal_convert_to_tensor if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. ret = [] for t in l: - ret.append(ops.internal_convert_to_tensor( + ret.append(internal_convert_to_tensor( t, dtype, preferred_dtype=default_dtype, ctx=ctx)) if dtype is None: dtype = ret[-1].dtype else: - ret = [ops.internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] + ret = [internal_convert_to_tensor(t, dtype, ctx=ctx) for t in l] return dtype, ret diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 9e7cdd493f..1e11d1ae8d 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -112,8 +112,10 @@ class Layer(object): self._per_input_losses = {} self._per_input_updates = {} self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name - self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) - or hasattr(self, 'compute_mask')) + call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in call_fn_args or + hasattr(self, 'compute_mask')) + self._call_has_scope_arg = 'scope' in call_fn_args # These lists will be filled via successive calls # to self._add_inbound_node(). @@ -555,7 +557,15 @@ class Layer(object): self.build(input_shapes[0]) else: self.build(input_shapes) - if 'scope' in estimator_util.fn_args(self.call): + try: + # Note: not all sub-classes of Layer call Layer.__init__ (especially + # the ones under tensorflow/python/keras). Hence we recompute this + # attribute here if it is not set. + # TODO(agarwal): Fix the sub-classes and avoid this complexity. + call_has_scope_arg = self._call_has_scope_arg + except AttributeError: + call_has_scope_arg = 'scope' in estimator_util.fn_args(self.call) + if call_has_scope_arg: kwargs['scope'] = scope # Check input assumptions set after layer building, e.g. input shape. if in_graph_mode: @@ -1433,8 +1443,10 @@ class Network(Layer): self._activity_regularizer = None self._scope = next(vs.variable_scope(None, default_name=base_name).gen) self._base_name = base_name - self._compute_previous_mask = ('mask' in estimator_util.fn_args(self.call) - or hasattr(self, 'compute_mask')) + call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in call_fn_args or + hasattr(self, 'compute_mask')) + self._call_has_scope_arg = 'scope' in call_fn_args # This acts just like the `trainable` attribute of any layer instance. # It does not affect users of the underlying layers, only users of the diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 0521129b27..ebcf397625 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -330,7 +330,7 @@ class BatchNormalization(base.Layer): lambda: self._one_minus_decay, lambda: 0.) else: - one_minus_decay = self._one_minus_decay + one_minus_decay = ops.convert_to_tensor(self._one_minus_decay) if training_value or training_value is None: mean_update = self._assign_moving_average(self.moving_mean, mean, one_minus_decay) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index bf4759e9ee..4ef9b05d51 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -540,16 +540,8 @@ class ResourceVariable(variables.Variable): the read operation. """ with ops.name_scope("Read"): - # In graph mode, ensure we read the variable in the same device as the - # handle. In eager mode, however, this sometimes tries to read a GPU - # variable in the CPU because the handle is host memory. For now, then, we - # need to skip the device block in eager. TODO(apassos): eager should have - # separate notions of device and memory, so handle.device can be GPU while - # handle.memory_space is always CPU. - if context.in_graph_mode(): - with ops.device(self._handle_device): - value = self._read_variable_op() - else: + # Ensure we read the variable in the same device as the handle. + with ops.device(self._handle_device): value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. -- GitLab From 70fc9bf9b668adebe20ef6d1f7a0e182d7d02cc4 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 4 Oct 2017 13:33:07 -0700 Subject: [PATCH 014/909] Java: Add support for loading op libraries dynamically. This change adds the equivalent of tf.load_op_library in Python to Java. (https://github.com/tensorflow/tensorflow/commit/5c7f9e316d8c7735308a217310350d416d7498cc was required to make this possible) Though, TensorFlow.loadLibrary() is likely to fail on Windows as symbols required by custom op libraries (those exported by the tensorflow_framework library) are not exported by the monolithic JNI library yet. This should help with #10454 and #13476 PiperOrigin-RevId: 171054707 --- tensorflow/java/BUILD | 9 ++++- .../main/java/org/tensorflow/TensorFlow.java | 30 ++++++++++++++++ .../java/src/main/native/tensorflow_jni.cc | 35 +++++++++++++++++++ .../java/src/main/native/tensorflow_jni.h | 30 ++++++++++++++-- .../java/org/tensorflow/TensorFlowTest.java | 23 ++++++++++++ tensorflow/java/src/test/native/my_test_op.cc | 21 +++++++++++ 6 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 tensorflow/java/src/test/native/my_test_op.cc diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 9de79af7d2..a380bc2c71 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -10,8 +10,9 @@ load(":src/gen/gen_ops.bzl", "tf_java_op_gen_srcjar") load( "//tensorflow:tensorflow.bzl", "tf_binary_additional_srcs", - "tf_copts", "tf_cc_binary", + "tf_copts", + "tf_custom_op_library", "tf_java_test", ) @@ -180,10 +181,16 @@ tf_java_test( ], ) +tf_custom_op_library( + name = "my_test_op.so", + srcs = ["src/test/native/my_test_op.cc"], +) + tf_java_test( name = "TensorFlowTest", size = "small", srcs = ["src/test/java/org/tensorflow/TensorFlowTest.java"], + data = [":my_test_op.so"], javacopts = JAVACOPTS, test_class = "org.tensorflow.TensorFlowTest", deps = [ diff --git a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java index c21214b763..c90655f25d 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java +++ b/tensorflow/java/src/main/java/org/tensorflow/TensorFlow.java @@ -29,6 +29,36 @@ public final class TensorFlow { */ public static native byte[] registeredOpList(); + /** + * Load the dynamic library in filename and register the operations and kernels present in that + * library. + * + * @param filename Path of the dynamic library containing operations and kernels to load. + * @return Serialized bytes of the OpList + * protocol buffer message defining the operations defined in the library. + * @throws UnsatisfiedLinkError if filename cannot be loaded. + */ + public static byte[] loadLibrary(String filename) { + long h = 0; + try { + h = libraryLoad(filename); + } catch (RuntimeException e) { + throw new UnsatisfiedLinkError(e.getMessage()); + } + try { + return libraryOpList(h); + } finally { + libraryDelete(h); + } + } + + private static native long libraryLoad(String filename); + + private static native void libraryDelete(long handle); + + private static native byte[] libraryOpList(long handle); + private TensorFlow() {} /** Load the TensorFlow runtime C library. */ diff --git a/tensorflow/java/src/main/native/tensorflow_jni.cc b/tensorflow/java/src/main/native/tensorflow_jni.cc index c553582e38..946ab502d1 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.cc +++ b/tensorflow/java/src/main/native/tensorflow_jni.cc @@ -14,7 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/java/src/main/native/tensorflow_jni.h" + +#include #include "tensorflow/c/c_api.h" +#include "tensorflow/java/src/main/native/exception_jni.h" JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv* env, jclass clazz) { @@ -30,3 +33,35 @@ Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv* env, jclass clazz) { TF_DeleteBuffer(buf); return ret; } + +JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad( + JNIEnv* env, jclass clazz, jstring filename) { + TF_Status* status = TF_NewStatus(); + const char* cname = env->GetStringUTFChars(filename, nullptr); + TF_Library* h = TF_LoadLibrary(cname, status); + throwExceptionIfNotOK(env, status); + env->ReleaseStringUTFChars(filename, cname); + TF_DeleteStatus(status); + return reinterpret_cast(h); +} + +JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete( + JNIEnv* env, jclass clazz, jlong handle) { + if (handle != 0) { + TF_DeleteLibraryHandle(reinterpret_cast(handle)); + } +} + +JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_TensorFlow_libraryOpList( + JNIEnv* env, jclass clazz, jlong handle) { + TF_Buffer buf = TF_GetOpList(reinterpret_cast(handle)); + if (buf.length > std::numeric_limits::max()) { + throwException(env, kIndexOutOfBoundsException, + "Serialized OpList is too large for a byte[] array"); + return nullptr; + } + auto ret_len = static_cast(buf.length); + jbyteArray ret = env->NewByteArray(ret_len); + env->SetByteArrayRegion(ret, 0, ret_len, static_cast(buf.data)); + return ret; +} diff --git a/tensorflow/java/src/main/native/tensorflow_jni.h b/tensorflow/java/src/main/native/tensorflow_jni.h index ecd9b15828..c0c9322020 100644 --- a/tensorflow/java/src/main/native/tensorflow_jni.h +++ b/tensorflow/java/src/main/native/tensorflow_jni.h @@ -27,7 +27,7 @@ extern "C" { * Method: version * Signature: ()Ljava/lang/String; */ -JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv*, +JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv *, jclass); /* @@ -36,7 +36,33 @@ JNIEXPORT jstring JNICALL Java_org_tensorflow_TensorFlow_version(JNIEnv*, * Signature: ()[B */ JNIEXPORT jbyteArray JNICALL -Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv*, jclass); +Java_org_tensorflow_TensorFlow_registeredOpList(JNIEnv *, jclass); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryLoad + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_tensorflow_TensorFlow_libraryLoad(JNIEnv *, + jclass, + jstring); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryDelete + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_tensorflow_TensorFlow_libraryDelete(JNIEnv *, + jclass, + jlong); + +/* + * Class: org_tensorflow_TensorFlow + * Method: libraryOpList + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL +Java_org_tensorflow_TensorFlow_libraryOpList(JNIEnv *, jclass, jlong); #ifdef __cplusplus } // extern "C" diff --git a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java index a31ea900d1..b1fa3f0d7e 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TensorFlowTest.java @@ -16,6 +16,7 @@ limitations under the License. package org.tensorflow; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import org.junit.Test; import org.junit.runner.RunWith; @@ -36,4 +37,26 @@ public class TensorFlowTest { // was not sorted out. Revisit? Till then, at least exercise the code. assertTrue(TensorFlow.registeredOpList().length > 0); } + + @Test + public void loadLibrary() { + // TODO(ashankar): This tell will fail when built with --config=monolithic. + // Figure out how we can ignore the test in that case. + try (Graph g = new Graph()) { + // Build a graph with an unrecognized operation. + try { + g.opBuilder("MyTest", "MyTest").build(); + fail("should not be able to construct graphs with unregistered ops"); + } catch (IllegalArgumentException e) { + // expected exception + } + + // Load the library containing the operation. + byte[] opList = TensorFlow.loadLibrary("tensorflow/java/my_test_op.so"); + assertTrue(opList.length > 0); + + // Now graph building should succeed. + g.opBuilder("MyTest", "MyTest").build(); + } + } } diff --git a/tensorflow/java/src/test/native/my_test_op.cc b/tensorflow/java/src/test/native/my_test_op.cc new file mode 100644 index 0000000000..eb755901ed --- /dev/null +++ b/tensorflow/java/src/test/native/my_test_op.cc @@ -0,0 +1,21 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" + +REGISTER_OP("MyTest") + .Doc("Custom operation for testing.") + .SetShapeFn(tensorflow::shape_inference::UnknownShape); -- GitLab From 53cc63a2d96522ea182a7f6619e25664b1ae6b0d Mon Sep 17 00:00:00 2001 From: Dhananjay Nakrani Date: Wed, 4 Oct 2017 13:57:18 -0700 Subject: [PATCH 015/909] [part 1] Add support for int32 & int64 in RandomPoissonOp. This computes int32/int64-precision poisson samples with double precision intermediate calculations (same as it's done for `half`) respectively. part 2 will switch over python calls to new op once forward compatibility period has passed. PiperOrigin-RevId: 171058336 --- tensorflow/core/kernels/random_poisson_op.cc | 75 ++++++++++++++----- tensorflow/core/kernels/random_poisson_op.h | 2 +- tensorflow/core/ops/random_ops.cc | 46 ++++++++++++ .../kernel_tests/random_poisson_test.py | 19 +++++ 4 files changed, 122 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc index b3957cbed6..3f635dbbaf 100644 --- a/tensorflow/core/kernels/random_poisson_op.cc +++ b/tensorflow/core/kernels/random_poisson_op.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include #include #include "tensorflow/core/framework/op_kernel.h" @@ -69,34 +70,42 @@ struct PoissonComputeType { typedef float ComputeType; }; +template <> +struct PoissonComputeType { + typedef double ComputeType; +}; + +template <> +struct PoissonComputeType { + typedef double ComputeType; +}; + } // namespace namespace functor { -template +template struct PoissonFunctor { void operator()(OpKernelContext* ctx, const Device& d, const T* rate_flat, int num_rate, int num_samples, - const random::PhiloxRandom& rng, T* samples_flat); + const random::PhiloxRandom& rng, U* samples_flat); }; -template -struct PoissonFunctor { +template +struct PoissonFunctor { void operator()(OpKernelContext* ctx, const CPUDevice& d, const T* rate_flat, int num_rate, int num_samples, - const random::PhiloxRandom& rng, T* samples_flat) { + const random::PhiloxRandom& rng, U* samples_flat) { // Two different algorithms are employed, depending on the size of // rate. // If rate < 10, we use an algorithm attributed to Knuth: // Seminumerical Algorithms. Art of Computer Programming, Volume 2. // // This algorithm runs in O(rate) time, and will require O(rate) - // uniform - // variates. + // uniform variates. // // If rate >= 10 we use a transformation-rejection algorithm from - // pairs - // of uniform random variables due to Hormann. + // pairs of uniform random variables due to Hormann. // http://www.sciencedirect.com/science/article/pii/0167668793909974 // // The algorithm has an acceptance rate of ~89% for the smallest rate @@ -154,8 +163,9 @@ struct PoissonFunctor { while (true) { UNIFORM(u); prod = prod * u; - if (prod <= exp_neg_rate) { - samples_rate_output[sample_idx * num_rate] = T(x); + if (prod <= exp_neg_rate && + x <= CT(Eigen::NumTraits::highest())) { + samples_rate_output[sample_idx * num_rate] = U(x); break; } x += 1; @@ -216,13 +226,18 @@ struct PoissonFunctor { CT k = Eigen::numext::floor((CT(2) * a / u_shifted + b) * u + rate + CT(0.43)); + if (k > CT(Eigen::NumTraits::highest())) { + // retry in case of overflow. + continue; + } + // When alpha * f(G(U)) * G'(U) is close to 1, it is possible to // find a rectangle (-u_r, u_r) x (0, v_r) under the curve, such // that if v <= v_r and |u| <= u_r, then we can accept. // Here v_r = 0.9227 - 3.6224 / (b - 2) and u_r = 0.43. if (u_shifted >= CT(0.07) && v <= CT(0.9277) - CT(3.6224) / (b - CT(2))) { - samples_rate_output[sample_idx * num_rate] = T(k); + samples_rate_output[sample_idx * num_rate] = U(k); break; } @@ -235,7 +250,7 @@ struct PoissonFunctor { CT s = log(v * inv_alpha / (a / (u_shifted * u_shifted) + b)); CT t = -rate + k * log_rate - Eigen::numext::lgamma(k + 1); if (s <= t) { - samples_rate_output[sample_idx * num_rate] = T(k); + samples_rate_output[sample_idx * num_rate] = U(k); break; } } @@ -280,7 +295,7 @@ struct PoissonFunctor { namespace { // Samples from one or more Poisson distributions. -template +template class RandomPoissonOp : public OpKernel { public: explicit RandomPoissonOp(OpKernelConstruction* context) : OpKernel(context) { @@ -303,13 +318,13 @@ class RandomPoissonOp : public OpKernel { const auto rate_flat = rate_t.flat().data(); const int64 num_rate = rate_t.NumElements(); - auto samples_flat = samples_t->flat().data(); + auto samples_flat = samples_t->flat().data(); random::PhiloxRandom rng = generator_.ReserveRandomOutputs( num_samples * num_rate, kReservedSamplesPerOutput); - functor::PoissonFunctor()(ctx, ctx->eigen_device(), - rate_flat, num_rate, num_samples, - rng, samples_flat); + functor::PoissonFunctor()( + ctx, ctx->eigen_device(), rate_flat, num_rate, num_samples, + rng, samples_flat); } private: @@ -324,12 +339,34 @@ class RandomPoissonOp : public OpKernel { #define REGISTER(TYPE) \ REGISTER_KERNEL_BUILDER( \ Name("RandomPoisson").Device(DEVICE_CPU).TypeConstraint("dtype"), \ - RandomPoissonOp); + RandomPoissonOp); TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); +#define REGISTER_V2(RTYPE, OTYPE) \ + REGISTER_KERNEL_BUILDER(Name("RandomPoissonV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("R") \ + .TypeConstraint("dtype"), \ + RandomPoissonOp); + +#define REGISTER_ALL(RTYPE) \ + REGISTER_V2(RTYPE, Eigen::half); \ + REGISTER_V2(RTYPE, float); \ + REGISTER_V2(RTYPE, double); \ + REGISTER_V2(RTYPE, int32); \ + REGISTER_V2(RTYPE, int64); + +REGISTER_ALL(Eigen::half); +REGISTER_ALL(float); +REGISTER_ALL(double); +REGISTER_ALL(int32); +REGISTER_ALL(int64); + +#undef REGISTER_ALL +#undef REGISTER_V2 #undef REGISTER } // end namespace tensorflow diff --git a/tensorflow/core/kernels/random_poisson_op.h b/tensorflow/core/kernels/random_poisson_op.h index 6c49acc800..4e9fd62520 100644 --- a/tensorflow/core/kernels/random_poisson_op.h +++ b/tensorflow/core/kernels/random_poisson_op.h @@ -21,7 +21,7 @@ namespace tensorflow { namespace functor { // Generic helper functor for the Random Poisson Op. -template +template struct PoissonFunctor; } // namespace functor diff --git a/tensorflow/core/ops/random_ops.cc b/tensorflow/core/ops/random_ops.cc index 2e3fdc7c57..eee1ed1d2a 100644 --- a/tensorflow/core/ops/random_ops.cc +++ b/tensorflow/core/ops/random_ops.cc @@ -265,6 +265,8 @@ output: A tensor with shape `shape + shape(alpha)`. Each slice `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. )doc"); +// TODO(dhananayn): Deprecate RandomPoisson and switch over to RandomPoissonV2 +// after forward compatibility period has passed. REGISTER_OP("RandomPoisson") .SetIsStateful() .Input("shape: S") @@ -309,4 +311,48 @@ output: A tensor with shape `shape + shape(rate)`. Each slice rate. )doc"); +REGISTER_OP("RandomPoissonV2") + .SetIsStateful() + .Input("shape: S") + .Input("rate: R") + .Output("output: dtype") + .Attr("seed: int = 0") + .Attr("seed2: int = 0") + .Attr("S: {int32, int64}") + .Attr("R: {half, float, double, int32, int64} = DT_DOUBLE") + .Attr("dtype: {half, float, double, int32, int64} = DT_INT64") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle out; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &out)); + TF_RETURN_IF_ERROR(c->Concatenate(out, c->input(1), &out)); + c->set_output(0, out); + return Status::OK(); + }) + .Doc(R"doc( +Outputs random values from the Poisson distribution(s) described by rate. + +This op uses two algorithms, depending on rate. If rate >= 10, then +the algorithm by Hormann is used to acquire samples via +transformation-rejection. +See http://www.sciencedirect.com/science/article/pii/0167668793909974. + +Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +random variables. +See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +Programming, Volume 2. Addison Wesley + +shape: 1-D integer tensor. Shape of independent samples to draw from each + distribution described by the shape parameters given in rate. +rate: A tensor in which each scalar is a "rate" parameter describing the + associated poisson distribution. +seed: If either `seed` or `seed2` are set to be non-zero, the random number + generator is seeded by the given seed. Otherwise, it is seeded by a + random seed. +seed2: A second seed to avoid seed collision. + +output: A tensor with shape `shape + shape(rate)`. Each slice + `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for + `rate[i0, i1, ...iN]`. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/random_poisson_test.py b/tensorflow/python/kernel_tests/random_poisson_test.py index 107c9bbe14..ca57e380e8 100644 --- a/tensorflow/python/kernel_tests/random_poisson_test.py +++ b/tensorflow/python/kernel_tests/random_poisson_test.py @@ -20,9 +20,11 @@ from __future__ import print_function import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_random_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -179,6 +181,23 @@ class RandomPoissonTest(test.TestCase): seed=12345) self.assertIs(None, rnd.get_shape().ndims) + def testDTypeCombinationsV2(self): + """Tests random_poisson_v2() for all supported dtype combinations.""" + # All supported dtypes by random_poisson_v2(). + supported_dtypes = [ + dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32, + dtypes.int64 + ] + + with self.test_session(): + for lam_dt in supported_dtypes: + for out_dt in supported_dtypes: + # TODO(dhananjayn): Change this to use random_poisson() after + # switching it to RandomPoissonV2. + gen_random_ops.random_poisson_v2( + [10], constant_op.constant([1], dtype=lam_dt), + dtype=out_dt).eval() + if __name__ == "__main__": test.main() -- GitLab From 3b4477000da27f4039ce275ad66f03e770c72a78 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 4 Oct 2017 14:29:09 -0700 Subject: [PATCH 016/909] Make VariantTensorData::tensors_size() const. PiperOrigin-RevId: 171063397 --- tensorflow/core/framework/variant_tensor_data.cc | 2 +- tensorflow/core/framework/variant_tensor_data.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/variant_tensor_data.cc b/tensorflow/core/framework/variant_tensor_data.cc index 93fac46e8e..82479193d2 100644 --- a/tensorflow/core/framework/variant_tensor_data.cc +++ b/tensorflow/core/framework/variant_tensor_data.cc @@ -28,7 +28,7 @@ VariantTensorData::VariantTensorData(const VariantTensorDataProto& proto) { VariantTensorData::~VariantTensorData() {} -int VariantTensorData::tensors_size() { return tensors_.size(); } +int VariantTensorData::tensors_size() const { return tensors_.size(); } const Tensor& VariantTensorData::tensors(int index) const { return tensors_[index]; diff --git a/tensorflow/core/framework/variant_tensor_data.h b/tensorflow/core/framework/variant_tensor_data.h index 4ee3df89fb..6e04879494 100644 --- a/tensorflow/core/framework/variant_tensor_data.h +++ b/tensorflow/core/framework/variant_tensor_data.h @@ -61,7 +61,7 @@ class VariantTensorData { } // Tensors contained within objects being serialized. - int tensors_size(); + int tensors_size() const; const Tensor& tensors(int index) const; std::vector tensors(); Tensor* add_tensors(); -- GitLab From 39565c0cbcd89a96a678e3453d3ab608d1293db1 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Wed, 4 Oct 2017 14:47:53 -0700 Subject: [PATCH 017/909] Publish train_and_evaluate and associated classes. PiperOrigin-RevId: 171066379 --- tensorflow/python/estimator/estimator_lib.py | 18 ++++++++ tensorflow/python/estimator/training.py | 4 -- .../tensorflow.estimator.-eval-spec.pbtxt | 43 +++++++++++++++++++ .../tensorflow.estimator.-exporter.pbtxt | 16 +++++++ ...ensorflow.estimator.-latest-exporter.pbtxt | 18 ++++++++ .../tensorflow.estimator.-train-spec.pbtxt | 27 ++++++++++++ .../api/golden/tensorflow.estimator.pbtxt | 20 +++++++++ 7 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-eval-spec.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-train-spec.pbtxt diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index 8e7d966564..a5b3faeffb 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -29,29 +29,47 @@ from tensorflow.python.estimator.canned.parsing_utils import classifier_parse_ex from tensorflow.python.estimator.canned.parsing_utils import regressor_parse_example_spec from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.export import export_lib as export +from tensorflow.python.estimator.exporter import Exporter +from tensorflow.python.estimator.exporter import LatestExporter from tensorflow.python.estimator.inputs import inputs from tensorflow.python.estimator.model_fn import EstimatorSpec from tensorflow.python.estimator.model_fn import ModeKeys from tensorflow.python.estimator.run_config import RunConfig +from tensorflow.python.estimator.training import EvalSpec +from tensorflow.python.estimator.training import train_and_evaluate +from tensorflow.python.estimator.training import TrainSpec + from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ + # Canned Estimators 'DNNClassifier', 'DNNRegressor', 'DNNLinearCombinedClassifier', 'DNNLinearCombinedRegressor', 'LinearClassifier', 'LinearRegressor', + + # I/O 'classifier_parse_example_spec', 'regressor_parse_example_spec', 'inputs', 'export', + + # Estimator 'Estimator', 'EstimatorSpec', 'ModeKeys', 'RunConfig', + + # Training utilities + 'train_and_evaluate', + 'EvalSpec', + 'TrainSpec', + 'Exporter', + 'LatestExporter', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 953e970eea..1bed19760b 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -479,10 +479,6 @@ class _StopAtSecsHook(session_run_hook.SessionRunHook): run_context.request_stop() -class UnimplementedError(Exception): - pass - - class _TrainingExecutor(object): """The executor to run `Estimator` training and evaluation. diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-eval-spec.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-eval-spec.pbtxt new file mode 100644 index 0000000000..db83ba1bd8 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-eval-spec.pbtxt @@ -0,0 +1,43 @@ +path: "tensorflow.estimator.EvalSpec" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "exporters" + mtype: "" + } + member { + name: "hooks" + mtype: "" + } + member { + name: "input_fn" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "start_delay_secs" + mtype: "" + } + member { + name: "steps" + mtype: "" + } + member { + name: "throttle_secs" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt new file mode 100644 index 0000000000..c69e4c7a30 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt @@ -0,0 +1,16 @@ +path: "tensorflow.estimator.Exporter" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "name" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "export" + argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt new file mode 100644 index 0000000000..c3f98f84b8 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.estimator.LatestExporter" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "name" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\', \'serving_input_fn\', \'assets_extra\', \'as_text\', \'exports_to_keep\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'5\'], " + } + member_method { + name: "export" + argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-train-spec.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-train-spec.pbtxt new file mode 100644 index 0000000000..7d2f77438a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-train-spec.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.estimator.TrainSpec" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "hooks" + mtype: "" + } + member { + name: "input_fn" + mtype: "" + } + member { + name: "max_steps" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt index 07b04810b5..25e94a14a6 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt @@ -24,6 +24,18 @@ tf_module { name: "EstimatorSpec" mtype: "" } + member { + name: "EvalSpec" + mtype: "" + } + member { + name: "Exporter" + mtype: "" + } + member { + name: "LatestExporter" + mtype: "" + } member { name: "LinearClassifier" mtype: "" @@ -40,6 +52,10 @@ tf_module { name: "RunConfig" mtype: "" } + member { + name: "TrainSpec" + mtype: "" + } member { name: "export" mtype: "" @@ -56,4 +72,8 @@ tf_module { name: "regressor_parse_example_spec" argspec: "args=[\'feature_columns\', \'label_key\', \'label_dtype\', \'label_default\', \'label_dimension\', \'weight_column\'], varargs=None, keywords=None, defaults=[\"\", \'None\', \'1\', \'None\'], " } + member_method { + name: "train_and_evaluate" + argspec: "args=[\'estimator\', \'train_spec\', \'eval_spec\'], varargs=None, keywords=None, defaults=None" + } } -- GitLab From 4486b4f69b55633274f7903158d680bf2e9eabff Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 4 Oct 2017 14:52:13 -0700 Subject: [PATCH 018/909] Make graph_callable compatible with functions that do not return anything PiperOrigin-RevId: 171067061 --- tensorflow/python/eager/graph_callable.py | 7 +++++- .../python/eager/graph_callable_test.py | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index a6131bea08..5933da7865 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -324,7 +324,9 @@ def _graph_callable_internal(func, shape_and_dtypes): captures): func_outputs = func(*func_inputs) outputs_list = nest.flatten(func_outputs) - output_shapes = [x.shape for x in outputs_list if x is not None] + if len(outputs_list) == 1 and outputs_list[0] is None: + outputs_list = [] + output_shapes = [x.shape for x in outputs_list] if not all(isinstance(x, tf_ops.Tensor) for x in outputs_list): raise ValueError("Found non-tensor output in %s" % str(outputs_list)) initializing_operations = tmp_graph.get_operations() @@ -420,6 +422,9 @@ def graph_callable(shape_and_dtypes): Note that the wrapped function is not allowed to change the values of the variables, just use them. + The return value of the wrapped function must be one of the following: + (1) None, (2) a Tensor, or (3) a possibly nested sequence of Tensors. + Example: ```python diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index 54a1c73dfd..cee6adec04 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -45,6 +45,29 @@ class GraphCallableTest(test.TestCase): self.assertEqual( 3, my_function(constant_op.constant(2, dtype=dtypes.float32)).numpy()) + def testFunctionWithoutReturnValue(self): + + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) + def my_function(x): + v = variable_scope.get_variable( + "v", initializer=init_ops.zeros_initializer(), shape=()) + v.assign(x) + + my_function(constant_op.constant(4, dtype=dtypes.float32)) + self.assertEqual(4, my_function.variables[0].read_value().numpy()) + + def testFunctionWithoutReturnValueAndArgs(self): + + @graph_callable.graph_callable([]) + def my_function(): + v = variable_scope.get_variable( + "v", initializer=init_ops.zeros_initializer(), shape=()) + v.assign(4) + + my_function() + self.assertEqual(4, my_function.variables[0].read_value().numpy()) + def testVariableAPI(self): @graph_callable.graph_callable( -- GitLab From 89df2e336218f7f3ecf2c70f8478c64985345ded Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 4 Oct 2017 15:13:33 -0700 Subject: [PATCH 019/909] Add the 'is_the_final_export' signal to Exporters. Use them in training. When the training ends, the final export is performed via `Exporter.export()` call. That final export is going to have is_the_final_export parameter being set to true. If `TrainSpec.max_steps` is `None`, then "when training ends" is undefined. We are going to train forever. In that case, `is_the_final_export` is going to be always False. I added a note about it. PiperOrigin-RevId: 171070760 --- tensorflow/python/estimator/exporter.py | 26 ++++++- tensorflow/python/estimator/exporter_test.py | 41 +++++++++- tensorflow/python/estimator/training.py | 37 ++++++--- tensorflow/python/estimator/training_test.py | 81 ++++++++++++++++++++ 4 files changed, 169 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 505820dd93..2faca11f6e 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -40,7 +40,8 @@ class Exporter(object): pass @abc.abstractmethod - def export(self, estimator, export_path, checkpoint_path, eval_result): + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): """Exports the given `Estimator` to a specific format. Args: @@ -48,6 +49,13 @@ class Exporter(object): export_path: A string containing a directory where to write the export. checkpoint_path: The checkpoint path to export. eval_result: The output of `Estimator.evaluate` on this checkpoint. + is_the_final_export: This boolean is True when this is an export in the + end of training. It is False for the intermediate exports during + the training. + + When passing `Exporter` to `tf.estimator.train_and_evaluate` + `is_the_final_export` is always False if `TrainSpec.max_steps` is + `None`. Returns: The string path to the exported directory or `None` if export is skipped. @@ -66,7 +74,8 @@ class LatestExporter(Exporter): serving_input_fn, assets_extra=None, as_text=False, - exports_to_keep=5): + exports_to_keep=5, + only_the_final_export=False): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: @@ -86,6 +95,8 @@ class LatestExporter(Exporter): exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to `None` to disable garbage collection. + only_the_final_export: Only the final export in the end of training will + happen if this is set to True. Raises: ValueError: if any arguments is invalid. @@ -95,6 +106,8 @@ class LatestExporter(Exporter): self._assets_extra = assets_extra self._as_text = as_text self._exports_to_keep = exports_to_keep + self._only_the_final_export = only_the_final_export + if exports_to_keep is not None and exports_to_keep <= 0: raise ValueError( '`exports_to_keep`, if provided, must be positive number') @@ -103,7 +116,14 @@ class LatestExporter(Exporter): def name(self): return self._name - def export(self, estimator, export_path, checkpoint_path, eval_result): + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + if not is_the_final_export and self._only_the_final_export: + return None + + if is_the_final_export: + tf_logging.info('Performing the final export in the end of training.') + export_result = estimator.export_savedmodel( export_path, self._serving_input_fn, diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 2ceff1bfd6..01582ac595 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -42,7 +42,7 @@ class LatestExporterTest(test.TestCase): serving_input_fn=_serving_input_fn, exports_to_keep=0) - def test_saved_model_exporter(self): + def test_latest_exporter(self): def _serving_input_fn(): pass @@ -60,7 +60,42 @@ class LatestExporterTest(test.TestCase): estimator.export_savedmodel.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, - "checkpoint_path", {}) + "checkpoint_path", {}, False) + + self.assertEqual("export_result_path", export_result) + estimator.export_savedmodel.assert_called_with( + export_dir_base, + _serving_input_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + checkpoint_path="checkpoint_path") + + def test_only_the_last_export_is_saved(self): + + def _serving_input_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + "export/" + gfile.MkDir(export_dir_base) + + exporter = exporter_lib.LatestExporter( + name="latest_exporter", + serving_input_fn=_serving_input_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + exports_to_keep=5, + only_the_final_export=True) + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.export_savedmodel.return_value = "export_result_path" + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {}, False) + + self.assertFalse(estimator.export_savedmodel.called) + self.assertEqual(None, export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {}, True) self.assertEqual("export_result_path", export_result) estimator.export_savedmodel.assert_called_with( @@ -93,7 +128,7 @@ class LatestExporterTest(test.TestCase): estimator = test.mock.Mock(spec=estimator_lib.Estimator) # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. - exporter.export(estimator, export_dir_base, None, None) + exporter.export(estimator, export_dir_base, None, None, False) self.assertFalse(gfile.Exists(export_dir_1)) self.assertFalse(gfile.Exists(export_dir_2)) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 1bed19760b..0a558a67b9 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -519,8 +519,11 @@ class _TrainingExecutor(object): class NewCheckpointListener( basic_session_run_hooks.CheckpointSaverListener): - def __init__(self, estimator, eval_spec): - self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec) # pylint: disable=protected-access + def __init__(self, estimator, eval_spec, max_training_steps): + # pylint: disable=protected-access + self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec, + max_training_steps) + # pylint: enable=protected-access def after_save(self, session, global_step_value): del session, global_step_value @@ -528,8 +531,10 @@ class _TrainingExecutor(object): # When the underlying `Estimator` object saves a new checkpoint, we would # like this callback to be called so that evaluation and export can trigger. - saving_listeners = [NewCheckpointListener(self._estimator, self._eval_spec)] - + saving_listeners = [ + NewCheckpointListener(self._estimator, self._eval_spec, + self._train_spec.max_steps) + ] return self._start_distributed_training(saving_listeners=saving_listeners) def run_evaluator(self): @@ -566,7 +571,8 @@ class _TrainingExecutor(object): 'after {} secs (eval_spec.throttle_secs) or training is ' 'finished.'.format(self._eval_spec.throttle_secs)) - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, + self._train_spec.max_steps) while True: self._estimator.train( @@ -636,7 +642,8 @@ class _TrainingExecutor(object): time.sleep(start_delay_secs) latest_eval_result = None - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, + self._train_spec.max_steps) while True: if latest_eval_result: @@ -663,11 +670,12 @@ class _TrainingExecutor(object): class _Evaluator(object): """A helper class to call `Estimator.evaluate` and export model.""" - def __init__(self, estimator, eval_spec): + def __init__(self, estimator, eval_spec, max_training_steps): self._estimator = estimator self._eval_spec = eval_spec self._previous_ckpt_path = None self._last_warning_time = 0 + self._max_training_steps = max_training_steps def evaluate_and_export(self): """Evaluate and (maybe) export the current model. @@ -712,7 +720,14 @@ class _TrainingExecutor(object): 'Internal error: `Estimator.evaluate` result should have ' '`global_step` in result. Given {}'.format(eval_result)) - self._export_eval_result(eval_result, latest_ckpt_path) + # TODO(isaprykin): There is a potential race condition here in the + # distributed setting. The worker job that performs training + # might stop at a later global step value than the evalutor job. + is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >= + self._max_training_steps + if self._max_training_steps else False) + self._export_eval_result(eval_result, latest_ckpt_path, + is_the_final_export) self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path @@ -725,7 +740,8 @@ class _TrainingExecutor(object): logging.warning(message) self._last_warning_time = current_time - def _export_eval_result(self, eval_result, checkpoint_path): + def _export_eval_result(self, eval_result, checkpoint_path, + is_the_final_export): """Export `eval_result` according to exporters in `EvalSpec`.""" export_dir_base = os.path.join( compat.as_str_any(self._estimator.model_dir), @@ -738,4 +754,5 @@ class _TrainingExecutor(object): compat.as_str_any(export_dir_base), compat.as_str_any(exporter.name)), checkpoint_path=checkpoint_path, - eval_result=eval_result) + eval_result=eval_result, + is_the_final_export=is_the_final_export) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index e4c400ca7f..08d11d7d25 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -802,6 +802,46 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, exporter.export.call_count) + def test_final_export_is_true_in_the_end(self): + training_max_step = 200 + + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est.model_dir = compat.as_bytes(test.get_temp_dir()) + mock_est.evaluate.side_effect = [ + {_GLOBAL_STEP_KEY: training_max_step // 2}, + {_GLOBAL_STEP_KEY: training_max_step} + ] + mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] + + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_train_spec.max_steps = training_max_step + + mock_est.times_export_fn_was_called = 0 + mock_est.times_the_final_export_was_true = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result + estimator.times_export_fn_was_called += 1 + if is_the_final_export: + estimator.times_the_final_export_was_true += 1 + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_how_many_times_export_is_called' + exporter.export = export + + eval_spec = training.EvalSpec( + input_fn=lambda: 1, + start_delay_secs=0, + throttle_secs=0, + exporters=exporter) + + executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) + executor.run_evaluator() + + self.assertEqual(2, mock_est.evaluate.call_count) + self.assertEqual(2, mock_est.times_export_fn_was_called) + self.assertEqual(1, mock_est.times_the_final_export_was_true) + def test_skip_evaluation_due_to_ckpt(self): training_max_step = 200 mock_est = test.mock.Mock(spec=estimator_lib.Estimator) @@ -1134,6 +1174,47 @@ class TrainingExecutorRunLocalTest(test.TestCase): with self.assertRaisesRegexp(RuntimeError, _STALE_CHECKPOINT_MSG): executor.run_local() + def test_final_export_is_true_in_the_end(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn + + mock_est.times_export_fn_was_called = 0 + mock_est.times_the_final_export_was_true = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result + estimator.times_export_fn_was_called += 1 + if is_the_final_export: + estimator.times_the_final_export_was_true += 1 + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_how_many_times_export_is_called' + exporter.export = export + + train_spec = training.TrainSpec( + input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) + eval_spec = training.EvalSpec( + input_fn=lambda: 1, + hooks=[_FakeHook()], + throttle_secs=100, + exporters=exporter) + # should be called 3 times. + mock_est.evaluate.side_effect = [{ + _GLOBAL_STEP_KEY: train_spec.max_steps - 100 + }, { + _GLOBAL_STEP_KEY: train_spec.max_steps - 50 + }, { + _GLOBAL_STEP_KEY: train_spec.max_steps + }] + + executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) + executor.run_local() + + self.assertEqual(3, mock_est.train.call_count) + self.assertEqual(3, mock_est.evaluate.call_count) + self.assertEqual(3, mock_est.times_export_fn_was_called) + self.assertEqual(1, mock_est.times_the_final_export_was_true) + def test_train_and_evaluate_args(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint.return_value = 'checkpoint_path/' -- GitLab From 89aaac4bc3ab5a6c65dfa143e42a8fad02e0223f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 15:14:06 -0700 Subject: [PATCH 020/909] Allow Layer.add_update() in Eager mode. PiperOrigin-RevId: 171070861 --- tensorflow/python/layers/base.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 1e11d1ae8d..4cf566bc8b 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -225,18 +225,17 @@ class Layer(object): The `get_updates_for` method allows to retrieve the updates relevant to a specific set of inputs. + This call is ignored in Eager mode. + Arguments: updates: Update op, or list/tuple of update ops. inputs: Optional input tensor(s) that the update(s) depend on. Must match the `inputs` argument passed to the `__call__` method at the time the updates are created. If `None` is passed, the updates are assumed to be unconditional, and will apply across all dataflows of the layer. - - Raises: - RuntimeError: If called in Eager mode. """ if context.in_eager_mode(): - raise RuntimeError('Layer.add_update not supported in Eager mode.') + return # Updates already applied when in eager mode. updates = _to_list(updates) if not updates: return -- GitLab From a02116882de2cfee41afac8e5b85df3cee565aee Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 4 Oct 2017 15:44:14 -0700 Subject: [PATCH 021/909] [XLA:CPU] Put the HLO name in IR values that hold the HLO's value. PiperOrigin-RevId: 171075449 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 2a952328a7..1e81a815d8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2833,6 +2833,15 @@ Status IrEmitter::Preprocess(HloInstruction* hlo) { } Status IrEmitter::Postprocess(HloInstruction* hlo) { + // Set the name of the emitted llvm::Value to IrName(hlo). Outfeed and send + // the only ops that don't emit a value. + if (hlo->opcode() != HloOpcode::kOutfeed && + hlo->opcode() != HloOpcode::kSend) { + auto it = emitted_value_.find(hlo); + CHECK(it != emitted_value_.end()); + it->second->setName(AsStringRef(IrName(hlo))); + } + if (auto* prof_counter = GetProfileCounterFor(hlo)) { profiling_state_.RecordCycleDelta(&ir_builder_, hlo, prof_counter); } -- GitLab From ee0fdc296ca00a3dde3def7dbe18252fa9c736dc Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 4 Oct 2017 15:44:34 -0700 Subject: [PATCH 022/909] Add noasan tag to estimator_test PiperOrigin-RevId: 171075499 --- tensorflow/python/keras/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index f29d40f729..f1266cdf9e 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -667,7 +667,10 @@ py_test( size = "medium", srcs = ["_impl/keras/estimator_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "noasan", + "notsan", + ], deps = [ ":keras", "//tensorflow/python:client_testlib", -- GitLab From eba759f74e98342bec09d6d7ddaf9ca638ec6056 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 4 Oct 2017 15:44:48 -0700 Subject: [PATCH 023/909] Switch some contextlib.contextmanagers to regular objects Converts just the frequently-accessed scopes in eager mode. @contextlib.contextmanagers create a few extra Python objects via generators and a wrapper class. PiperOrigin-RevId: 171075529 --- tensorflow/python/framework/errors_impl.py | 38 +- tensorflow/python/framework/ops.py | 113 ++-- tensorflow/python/layers/base.py | 16 +- tensorflow/python/ops/variable_scope.py | 561 ++++++++++-------- .../tools/api/golden/tensorflow.errors.pbtxt | 8 +- ...ors.raise_exception_on_not_ok_status.pbtxt | 8 + .../tensorflow.keras.backend.name_scope.pbtxt | 9 + .../api/golden/tensorflow.keras.backend.pbtxt | 8 +- .../api/golden/tensorflow.name_scope.pbtxt | 9 + tensorflow/tools/api/golden/tensorflow.pbtxt | 16 +- .../golden/tensorflow.variable_scope.pbtxt | 9 + 11 files changed, 463 insertions(+), 332 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.errors.raise_exception_on_not_ok_status.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.backend.name_scope.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.name_scope.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index fa956c3d29..c3b2c498c3 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import contextlib import traceback import warnings @@ -455,17 +454,26 @@ def _make_specific_exception(node_def, op, message, error_code): return UnknownError(node_def, op, message, error_code) -@contextlib.contextmanager -def raise_exception_on_not_ok_status(): - status = c_api_util.ScopedTFStatus() - yield status.status - try: - if c_api.TF_GetCode(status.status) != 0: - raise _make_specific_exception( - None, None, - compat.as_text(c_api.TF_Message(status.status)), - c_api.TF_GetCode(status.status)) - # Delete the underlying status object from memory otherwise it stays alive - # as there is a reference to status from this from the traceback due to raise. - finally: - del status +# Named like a function for backwards compatibility with the +# @tf_contextlib.contextmanager version, which was switched to a class to avoid +# some object creation overhead. +class raise_exception_on_not_ok_status(object): # pylint: disable=invalid-name + """Context manager to check for C API status.""" + + def __enter__(self): + self.status = c_api_util.ScopedTFStatus() + return self.status.status + + def __exit__(self, type_arg, value_arg, traceback_arg): + try: + if c_api.TF_GetCode(self.status.status) != 0: + raise _make_specific_exception( + None, None, + compat.as_text(c_api.TF_Message(self.status.status)), + c_api.TF_GetCode(self.status.status)) + # Delete the underlying status object from memory otherwise it stays alive + # as there is a reference to status from this from the traceback due to + # raise. + finally: + del self.status + return False # False values do not suppress exceptions diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index d1744f451e..50aa070985 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -73,9 +73,13 @@ def tensor_id(tensor): return tensor._id # pylint: disable=protected-access -@tf_contextlib.contextmanager -def _null_contextmanager(): - yield +class _NullContextmanager(object): + + def __enter__(self): + pass + + def __exit__(self, type_arg, value_arg, traceback_arg): + return False # False values do not suppress exceptions def _override_helper(clazz_object, operator, func): @@ -4263,7 +4267,7 @@ def colocate_with(op, ignore_existing=False): if op is not None: return device(op.device) else: - return _null_contextmanager() + return _NullContextmanager() def control_dependencies(control_inputs): @@ -4285,7 +4289,7 @@ def control_dependencies(control_inputs): if context.in_graph_mode(): return get_default_graph().control_dependencies(control_inputs) else: - return _null_contextmanager() + return _NullContextmanager() class _DefaultStack(threading.local): @@ -4839,10 +4843,11 @@ def get_all_collection_keys(): return get_default_graph().get_all_collection_keys() -# pylint: disable=g-doc-return-or-yield -@tf_contextlib.contextmanager -def name_scope(name, default_name=None, values=None): - """Returns a context manager for use when defining a Python op. +# Named like a function for backwards compatibility with the +# @tf_contextlib.contextmanager version, which was switched to a class to avoid +# some object creation overhead. +class name_scope(object): # pylint: disable=invalid-name + """A context manager for use when defining a Python op. This context manager validates that the given `values` are from the same graph, makes that graph the default graph, and pushes a @@ -4861,48 +4866,64 @@ def name_scope(name, default_name=None, values=None): # Define some computation that uses `a`, `b`, and `c`. return foo_op(..., name=scope) ``` + """ - Args: - name: The name argument that is passed to the op function. - default_name: The default name to use if the `name` argument is `None`. - values: The list of `Tensor` arguments that are passed to the op function. + def __init__(self, name, default_name=None, values=None): + """Initialize the context manager. - Returns: - A context manager for use in defining Python ops. Yields the name scope. + Args: + name: The name argument that is passed to the op function. + default_name: The default name to use if the `name` argument is `None`. + values: The list of `Tensor` arguments that are passed to the op function. + """ + self._name = default_name if name is None else name + self._default_name = default_name + self._values = values + self._ctx = context.context() + self._in_eager_mode = self._ctx.in_eager_mode() - Raises: - ValueError: if neither `name` nor `default_name` is provided - but `values` are. - """ - name = default_name if name is None else name - ctx = context.context() - if ctx.in_eager_mode(): - old_name = ctx.scope_name - if name: - scope_name = "%s%s/" % (old_name, name) if old_name else "%s/" % name - else: - scope_name = "" - ctx.scope_name = scope_name - try: - yield scope_name - finally: - ctx.scope_name = old_name - else: - if name is None and values is not None: - # We only raise an error if values is not None (provided) because - # currently tf.name_scope(None) (values=None then) is sometimes used as an - # idiom to reset to top scope. - raise ValueError( - "At least one of name (%s) and default_name (%s) must be provided." % - (name, default_name)) - if values is None: - values = [] - g = _get_graph_from_inputs(values) - with g.as_default(), g.name_scope(name) as scope: - yield scope + def __enter__(self): + """Start the scope block. + Returns: + The scope name. -# pylint: enable=g-doc-return-or-yield + Raises: + ValueError: if neither `name` nor `default_name` is provided + but `values` are. + """ + if self._in_eager_mode: + self._old_name = self._ctx.scope_name + if self._name: + scope_name = (self._old_name + self._name + "/" + if self._old_name else self._name + "/") + else: + scope_name = "" + self._ctx.scope_name = scope_name + return scope_name + else: + if self._name is None and self._values is not None: + # We only raise an error if values is not None (provided) because + # currently tf.name_scope(None) (values=None then) is sometimes used as + # an idiom to reset to top scope. + raise ValueError( + "At least one of name (%s) and default_name (%s) must be provided." + % (self._name, self._default_name)) + if self._values is None: + self._values = [] + g = _get_graph_from_inputs(self._values) + self._g_manager = g.as_default() + self._g_manager.__enter__() + self._name_scope = g.name_scope(self._name) + return self._name_scope.__enter__() + + def __exit__(self, type_arg, value_arg, traceback_arg): + if self._in_eager_mode: + self._ctx.scope_name = self._old_name + else: + self._name_scope.__exit__(type_arg, value_arg, traceback_arg) + self._g_manager.__exit__(type_arg, value_arg, traceback_arg) + return False # False values do not suppress exceptions def strip_name_scope(name, export_scope): diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 4cf566bc8b..711ffdfa9c 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -136,7 +136,8 @@ class Layer(object): # Determine variable scope. scope = kwargs.get('_scope') if scope: - self._scope = next(vs.variable_scope(scope).gen) + with vs.variable_scope(scope) as captured_scope: + self._scope = captured_scope else: self._scope = None @@ -402,11 +403,13 @@ class Layer(object): if self._scope is None: # If constructed with _scope=None, lazy setting of scope. if self._reuse: - self._scope = next(vs.variable_scope( - scope if scope is not None else self._base_name).gen) + with vs.variable_scope( + scope if scope is not None else self._base_name) as captured_scope: + self._scope = captured_scope else: - self._scope = next(vs.variable_scope( - scope, default_name=self._base_name).gen) + with vs.variable_scope( + scope, default_name=self._base_name) as captured_scope: + self._scope = captured_scope def add_variable(self, name, shape, dtype=None, initializer=None, regularizer=None, @@ -1440,7 +1443,8 @@ class Network(Layer): base_name = _to_snake_case(self.__class__.__name__) self._name = _unique_layer_name(base_name) self._activity_regularizer = None - self._scope = next(vs.variable_scope(None, default_name=base_name).gen) + with vs.variable_scope(None, default_name=base_name) as captured_scope: + self._scope = captured_scope self._base_name = base_name call_fn_args = estimator_util.fn_args(self.call) self._compute_previous_mask = ('mask' in call_fn_args or diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 33790c5d0a..d0ebfdb85e 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1406,139 +1406,162 @@ def _get_partitioned_variable(name, # pylint: enable=protected-access -@tf_contextlib.contextmanager -def _pure_variable_scope(name_or_scope, - reuse=None, - initializer=None, - regularizer=None, - caching_device=None, - partitioner=None, - custom_getter=None, - old_name_scope=None, - dtype=dtypes.float32, - use_resource=None, - constraint=None): - """Creates a context for the variable_scope, see `variable_scope` for docs. - - Note: this does not create a name scope. +# Named like a function for compatibility with the previous +# @tf_contextlib.contextmanager definition. +class _pure_variable_scope(object): # pylint: disable=invalid-name + """A context for the variable_scope, see `variable_scope` for docs.""" - Args: - name_or_scope: `string` or `VariableScope`: the scope to open. - reuse: `True` or None, or tf.AUTO_REUSE; if `None`, we inherit the parent - scope's reuse flag. - initializer: default initializer for variables within this scope. - regularizer: default regularizer for variables within this scope. - caching_device: default caching device for variables within this scope. - partitioner: default partitioner for variables within this scope. - custom_getter: default custom getter for variables within this scope. - old_name_scope: the original name scope when re-entering a variable scope. - dtype: type of the variables within this scope (defaults to `DT_FLOAT`). - use_resource: If False, variables in this scope will be regular Variables. - If True, experimental ResourceVariables will be creates instead, with - well-defined semantics. Defaults to False (will later change to True). - constraint: An optional projection function to be applied to the variable - after being updated by an `Optimizer` (e.g. used to implement norm - constraints or value constraints for layer weights). The function must - take as input the unprojected Tensor representing the value of the - variable and return the Tensor for the projected value - (which must have the same shape). Constraints are not safe to - use when doing asynchronous distributed training. + def __init__(self, + name_or_scope, + reuse=None, + initializer=None, + regularizer=None, + caching_device=None, + partitioner=None, + custom_getter=None, + old_name_scope=None, + dtype=dtypes.float32, + use_resource=None, + constraint=None): + """Creates a context for the variable_scope, see `variable_scope` for docs. - Yields: - A scope that can be captured and reused. + Note: this does not create a name scope. - Raises: - ValueError: when trying to reuse within a create scope, or create within - a reuse scope, or if reuse is not `None` or `True`. - TypeError: when the types of some arguments are not appropriate. + Args: + name_or_scope: `string` or `VariableScope`: the scope to open. + reuse: `True` or None, or tf.AUTO_REUSE; if `None`, we inherit the parent + scope's reuse flag. + initializer: default initializer for variables within this scope. + regularizer: default regularizer for variables within this scope. + caching_device: default caching device for variables within this scope. + partitioner: default partitioner for variables within this scope. + custom_getter: default custom getter for variables within this scope. + old_name_scope: the original name scope when re-entering a variable scope. + dtype: type of the variables within this scope (defaults to `DT_FLOAT`). + use_resource: If False, variables in this scope will be regular Variables. + If True, experimental ResourceVariables will be creates instead, with + well-defined semantics. Defaults to False (will later change to True). + constraint: An optional projection function to be applied to the variable + after being updated by an `Optimizer` (e.g. used to implement norm + constraints or value constraints for layer weights). The function must + take as input the unprojected Tensor representing the value of the + variable and return the Tensor for the projected value + (which must have the same shape). Constraints are not safe to + use when doing asynchronous distributed training. + """ + self._name_or_scope = name_or_scope + self._reuse = reuse + self._initializer = initializer + self._regularizer = regularizer + self._caching_device = caching_device + self._partitioner = partitioner + self._custom_getter = custom_getter + self._old_name_scope = old_name_scope + self._dtype = dtype + self._use_resource = use_resource + self._constraint = constraint - """ - get_variable_scope() # Ensure that a default exists, then get a pointer. - # Get the reference to the collection as we want to modify it in place. - default_varscope = ops.get_collection_ref(_VARSCOPE_KEY) - old = default_varscope[0] - var_store = _get_default_variable_store() - if isinstance(name_or_scope, VariableScope): - new_name = name_or_scope.name - else: - new_name = old.name + "/" + name_or_scope if old.name else name_or_scope - try: - var_store.open_variable_scope(new_name) - if isinstance(name_or_scope, VariableScope): - old_subscopes = copy.copy(var_store.variable_scopes_count) - name_scope = name_or_scope._name_scope # pylint: disable=protected-access - # Handler for the case when we jump to a shared scope. - # We create a new VariableScope (default_varscope[0]) that contains - # a copy of the provided shared scope, possibly with changed reuse - # and initializer, if the user requested this. - default_varscope[0] = VariableScope( - name_or_scope.reuse if not reuse else reuse, - name=new_name, - initializer=name_or_scope.initializer, - regularizer=name_or_scope.regularizer, - caching_device=name_or_scope.caching_device, - partitioner=name_or_scope.partitioner, - dtype=name_or_scope.dtype, - custom_getter=name_or_scope.custom_getter, + def __enter__(self): + """Begins the scope block. + + Returns: + A VariableScope. + Raises: + ValueError: when trying to reuse within a create scope, or create within + a reuse scope, or if reuse is not `None` or `True`. + TypeError: when the types of some arguments are not appropriate. + """ + get_variable_scope() # Ensure that a default exists, then get a pointer. + # Get the reference to the collection as we want to modify it in place. + self._default_varscope = ops.get_collection_ref(_VARSCOPE_KEY) + self._old = self._default_varscope[0] + self._var_store = _get_default_variable_store() + if isinstance(self._name_or_scope, VariableScope): + self._new_name = self._name_or_scope.name + else: + self._new_name = ( + self._old.name + "/" + self._name_or_scope if self._old.name + else self._name_or_scope) + self._var_store.open_variable_scope(self._new_name) + if isinstance(self._name_or_scope, VariableScope): + self._old_subscopes = copy.copy(self._var_store.variable_scopes_count) + name_scope = self._name_or_scope._name_scope # pylint: disable=protected-access + # Handler for the case when we jump to a shared scope. We create a new + # VariableScope (self._default_varscope[0]) that contains a copy of the + # provided shared scope, possibly with changed reuse and initializer, if + # the user requested this. + self._default_varscope[0] = VariableScope( + self._name_or_scope.reuse if not self._reuse else self._reuse, + name=self._new_name, + initializer=self._name_or_scope.initializer, + regularizer=self._name_or_scope.regularizer, + caching_device=self._name_or_scope.caching_device, + partitioner=self._name_or_scope.partitioner, + dtype=self._name_or_scope.dtype, + custom_getter=self._name_or_scope.custom_getter, name_scope=name_scope, - use_resource=name_or_scope.use_resource, - constraint=constraint) - if initializer is not None: - default_varscope[0].set_initializer(initializer) - if regularizer is not None: - default_varscope[0].set_regularizer(regularizer) - if caching_device is not None: - default_varscope[0].set_caching_device(caching_device) - if partitioner is not None: - default_varscope[0].set_partitioner(partitioner) - if custom_getter is not None: - default_varscope[0].set_custom_getter( + use_resource=self._name_or_scope.use_resource, + constraint=self._constraint) + if self._initializer is not None: + self._default_varscope[0].set_initializer(self._initializer) + if self._regularizer is not None: + self._default_varscope[0].set_regularizer(self._regularizer) + if self._caching_device is not None: + self._default_varscope[0].set_caching_device(self._caching_device) + if self._partitioner is not None: + self._default_varscope[0].set_partitioner(self._partitioner) + if self._custom_getter is not None: + self._default_varscope[0].set_custom_getter( _maybe_wrap_custom_getter( - custom_getter, name_or_scope.custom_getter)) - if dtype is not None: - default_varscope[0].set_dtype(dtype) - if use_resource is not None: - default_varscope[0].set_use_resource(use_resource) - yield default_varscope[0] + self._custom_getter, self._name_or_scope.custom_getter)) + if self._dtype is not None: + self._default_varscope[0].set_dtype(self._dtype) + if self._use_resource is not None: + self._default_varscope[0].set_use_resource(self._use_resource) + return self._default_varscope[0] else: # Handler for the case when we just prolong current variable scope. # VariableScope with name extended by the provided one, and inherited # reuse and initializer (except if the user provided values to set). - reuse = reuse or old.reuse # Re-using is inherited by sub-scopes. - default_varscope[0] = VariableScope( - reuse, - name=new_name, - initializer=old.initializer, - regularizer=old.regularizer, - caching_device=old.caching_device, - partitioner=old.partitioner, - dtype=old.dtype, - use_resource=old.use_resource, - custom_getter=old.custom_getter, - name_scope=old_name_scope or name_or_scope, - constraint=constraint) - if initializer is not None: - default_varscope[0].set_initializer(initializer) - if regularizer is not None: - default_varscope[0].set_regularizer(regularizer) - if caching_device is not None: - default_varscope[0].set_caching_device(caching_device) - if partitioner is not None: - default_varscope[0].set_partitioner(partitioner) - if custom_getter is not None: - default_varscope[0].set_custom_getter( - _maybe_wrap_custom_getter(custom_getter, old.custom_getter)) - if dtype is not None: - default_varscope[0].set_dtype(dtype) - if use_resource is not None: - default_varscope[0].set_use_resource(use_resource) - yield default_varscope[0] - finally: - var_store.close_variable_subscopes(new_name) + self._reuse = (self._reuse + or self._old.reuse) # Re-using is inherited by sub-scopes. + self._default_varscope[0] = VariableScope( + self._reuse, + name=self._new_name, + initializer=self._old.initializer, + regularizer=self._old.regularizer, + caching_device=self._old.caching_device, + partitioner=self._old.partitioner, + dtype=self._old.dtype, + use_resource=self._old.use_resource, + custom_getter=self._old.custom_getter, + name_scope=self._old_name_scope or self._name_or_scope, + constraint=self._constraint) + if self._initializer is not None: + self._default_varscope[0].set_initializer(self._initializer) + if self._regularizer is not None: + self._default_varscope[0].set_regularizer(self._regularizer) + if self._caching_device is not None: + self._default_varscope[0].set_caching_device(self._caching_device) + if self._partitioner is not None: + self._default_varscope[0].set_partitioner(self._partitioner) + if self._custom_getter is not None: + self._default_varscope[0].set_custom_getter( + _maybe_wrap_custom_getter(self._custom_getter, + self._old.custom_getter)) + if self._dtype is not None: + self._default_varscope[0].set_dtype(self._dtype) + if self._use_resource is not None: + self._default_varscope[0].set_use_resource(self._use_resource) + return self._default_varscope[0] + + def __exit__(self, type_arg, value_arg, traceback_arg): # If jumping out from a non-prolonged scope, restore counts. - if isinstance(name_or_scope, VariableScope): - var_store.variable_scopes_count = old_subscopes - default_varscope[0] = old + if isinstance(self._name_or_scope, VariableScope): + self._var_store.variable_scopes_count = self._old_subscopes + else: + self._var_store.close_variable_subscopes(self._new_name) + self._default_varscope[0] = self._old def _maybe_wrap_custom_getter(custom_getter, old_getter): @@ -1574,25 +1597,15 @@ def _get_unique_variable_scope(prefix): return prefix + ("_%d" % idx) -# pylint: disable=g-doc-return-or-yield -@tf_contextlib.contextmanager -def variable_scope(name_or_scope, - default_name=None, - values=None, - initializer=None, - regularizer=None, - caching_device=None, - partitioner=None, - custom_getter=None, - reuse=None, - dtype=None, - use_resource=None, - constraint=None): - """Returns a context manager for defining ops that creates variables (layers). +# Named like a function for backwards compatibility with the +# @tf_contextlib.contextmanager version, which was switched to a class to avoid +# some object creation overhead. +class variable_scope(object): # pylint: disable=invalid-name + """A context manager for defining ops that creates variables (layers). - This context manager validates that the (optional) `values` are from - the same graph, ensures that graph is the default graph, and pushes a - name scope and a variable scope. + This context manager validates that the (optional) `values` are from the same + graph, ensures that graph is the default graph, and pushes a name scope and a + variable scope. If `name_or_scope` is not None, it is used as is. If `scope` is None, then `default_name` is used. In that case, if the same name has been previously @@ -1600,8 +1613,8 @@ def variable_scope(name_or_scope, Variable scope allows you to create new variables and to share already created ones while providing checks to not create or share by accident. For details, - see the @{$variables$Variable Scope How To}, - here we present only a few basic examples. + see the @{$variables$Variable Scope How To}, here we present only a few basic + examples. Simple example of how to create a new variable: @@ -1645,8 +1658,8 @@ def variable_scope(name_or_scope, assert v1 == v ``` - To prevent accidental sharing of variables, we raise an exception when - getting an existing variable in a non-reusing scope. + To prevent accidental sharing of variables, we raise an exception when getting + an existing variable in a non-reusing scope. ```python with tf.variable_scope("foo"): @@ -1655,8 +1668,8 @@ def variable_scope(name_or_scope, # Raises ValueError("... v already exists ..."). ``` - Similarly, we raise an exception when trying to get a variable that - does not exist in reuse mode. + Similarly, we raise an exception when trying to get a variable that does not + exist in reuse mode. ```python with tf.variable_scope("foo", reuse=True): @@ -1664,123 +1677,173 @@ def variable_scope(name_or_scope, # Raises ValueError("... v does not exists ..."). ``` - Note that the `reuse` flag is inherited: if we open a reusing scope, - then all its sub-scopes become reusing as well. + Note that the `reuse` flag is inherited: if we open a reusing scope, then all + its sub-scopes become reusing as well. A note about name scoping: Setting `reuse` does not impact the naming of other - ops such as mult. See related discussion on [github#6189](https://github.com/tensorflow/tensorflow/issues/6189) + ops such as mult. See related discussion on + [github#6189](https://github.com/tensorflow/tensorflow/issues/6189) - Note that up to and including version 1.0, it was allowed (though - explicitly discouraged) to pass False to the reuse argument, yielding - undocumented behaviour slightly different from None. Starting at 1.1.0 - passing None and False as reuse has exactly the same effect. + Note that up to and including version 1.0, it was allowed (though explicitly + discouraged) to pass False to the reuse argument, yielding undocumented + behaviour slightly different from None. Starting at 1.1.0 passing None and + False as reuse has exactly the same effect. + """ - Args: - name_or_scope: `string` or `VariableScope`: the scope to open. - default_name: The default name to use if the `name_or_scope` argument is - `None`, this name will be uniquified. If name_or_scope is provided it - won't be used and therefore it is not required and can be None. - values: The list of `Tensor` arguments that are passed to the op function. - initializer: default initializer for variables within this scope. - regularizer: default regularizer for variables within this scope. - caching_device: default caching device for variables within this scope. - partitioner: default partitioner for variables within this scope. - custom_getter: default custom getter for variables within this scope. - reuse: `True`, None, or tf.AUTO_REUSE; if `True`, we go into reuse mode - for this scope as well as all sub-scopes; if tf.AUTO_REUSE, we create - variables if they do not exist, and return them otherwise; if None, we - inherit the parent scope's reuse flag. In Eager mode, this argument is - always forced to be tf.AUTO_REUSE. - dtype: type of variables created in this scope (defaults to the type - in the passed scope, or inherited from parent scope). - use_resource: If False, all variables will be regular Variables. If True, - experimental ResourceVariables with well-defined semantics will be used - instead. Defaults to False (will later change to True). In Eager mode, - this argument is always forced to be True. - constraint: An optional projection function to be applied to the variable - after being updated by an `Optimizer` (e.g. used to implement norm - constraints or value constraints for layer weights). The function must - take as input the unprojected Tensor representing the value of the - variable and return the Tensor for the projected value - (which must have the same shape). Constraints are not safe to - use when doing asynchronous distributed training. + def __init__(self, + name_or_scope, + default_name=None, + values=None, + initializer=None, + regularizer=None, + caching_device=None, + partitioner=None, + custom_getter=None, + reuse=None, + dtype=None, + use_resource=None, + constraint=None): + """Initialize the context manager. - Returns: - A scope that can be captured and reused. + Args: + name_or_scope: `string` or `VariableScope`: the scope to open. + default_name: The default name to use if the `name_or_scope` argument is + `None`, this name will be uniquified. If name_or_scope is provided it + won't be used and therefore it is not required and can be None. + values: The list of `Tensor` arguments that are passed to the op function. + initializer: default initializer for variables within this scope. + regularizer: default regularizer for variables within this scope. + caching_device: default caching device for variables within this scope. + partitioner: default partitioner for variables within this scope. + custom_getter: default custom getter for variables within this scope. + reuse: `True`, None, or tf.AUTO_REUSE; if `True`, we go into reuse mode + for this scope as well as all sub-scopes; if tf.AUTO_REUSE, we create + variables if they do not exist, and return them otherwise; if None, we + inherit the parent scope's reuse flag. In Eager mode, this argument is + always forced to be tf.AUTO_REUSE. + dtype: type of variables created in this scope (defaults to the type + in the passed scope, or inherited from parent scope). + use_resource: If False, all variables will be regular Variables. If True, + experimental ResourceVariables with well-defined semantics will be used + instead. Defaults to False (will later change to True). In Eager mode, + this argument is always forced to be True. + constraint: An optional projection function to be applied to the variable + after being updated by an `Optimizer` (e.g. used to implement norm + constraints or value constraints for layer weights). The function must + take as input the unprojected Tensor representing the value of the + variable and return the Tensor for the projected value + (which must have the same shape). Constraints are not safe to + use when doing asynchronous distributed training. - Raises: - ValueError: when trying to reuse within a create scope, or create within - a reuse scope. - TypeError: when the types of some arguments are not appropriate. - """ - if default_name is None and name_or_scope is None: - raise TypeError("If default_name is None then name_or_scope is required") - if reuse is False: # We don't allow non-inheriting scopes, False = None here. - reuse = None - if not (reuse is True or reuse is None or reuse is AUTO_REUSE): - raise ValueError("The reuse parameter must be True or False or None.") - if values is None: - values = [] - g = ops._get_graph_from_inputs(values) # pylint: disable=protected-access - with g.as_default(): - if name_or_scope is not None: - if not isinstance(name_or_scope, (VariableScope,) + six.string_types): + Returns: + A scope that can be captured and reused. + + Raises: + ValueError: when trying to reuse within a create scope, or create within + a reuse scope. + TypeError: when the types of some arguments are not appropriate. + """ + self._name_or_scope = name_or_scope + self._default_name = default_name + self._values = values + self._initializer = initializer + self._regularizer = regularizer + self._caching_device = caching_device + self._partitioner = partitioner + self._custom_getter = custom_getter + self._reuse = reuse + self._dtype = dtype + self._use_resource = use_resource + self._constraint = constraint + if self._default_name is None and self._name_or_scope is None: + raise TypeError("If default_name is None then name_or_scope is required") + if self._reuse is False: + # We don't allow non-inheriting scopes, False = None here. + self._reuse = None + if not (self._reuse is True + or self._reuse is None + or self._reuse is AUTO_REUSE): + raise ValueError("The reuse parameter must be True or False or None.") + if self._values is None: + self._values = [] + self._in_graph_mode = not context.in_eager_mode() + if self._in_graph_mode: + self._graph = ops._get_graph_from_inputs(self._values) # pylint: disable=protected-access + + def __enter__(self): + if self._in_graph_mode: + self._graph_context_manager = self._graph.as_default() + self._graph_context_manager.__enter__() + if self._name_or_scope is not None: + if not isinstance(self._name_or_scope, + (VariableScope,) + six.string_types): raise TypeError("VariableScope: name_or_scope must be a string or " "VariableScope.") - if isinstance(name_or_scope, six.string_types): - name_scope = name_or_scope + if isinstance(self._name_or_scope, six.string_types): + name_scope = self._name_or_scope else: - name_scope = name_or_scope.name.split("/")[-1] + name_scope = self._name_or_scope.name.split("/")[-1] if name_scope: - with ops.name_scope(name_scope) as cur_name_scope: - if isinstance(name_or_scope, six.string_types): - old_name_scope = cur_name_scope - else: - old_name_scope = name_or_scope.original_name_scope - with _pure_variable_scope( - name_or_scope, - reuse=reuse, - initializer=initializer, - regularizer=regularizer, - caching_device=caching_device, - partitioner=partitioner, - custom_getter=custom_getter, - old_name_scope=old_name_scope, - dtype=dtype, - use_resource=use_resource, - constraint=constraint) as vs: - yield vs + self._current_name_scope = ops.name_scope(name_scope) + current_name_scope_name = self._current_name_scope.__enter__() + if isinstance(self._name_or_scope, six.string_types): + old_name_scope = current_name_scope_name + else: + old_name_scope = self._name_or_scope.original_name_scope + self._pure_variable_scope = _pure_variable_scope( + self._name_or_scope, + reuse=self._reuse, + initializer=self._initializer, + regularizer=self._regularizer, + caching_device=self._caching_device, + partitioner=self._partitioner, + custom_getter=self._custom_getter, + old_name_scope=old_name_scope, + dtype=self._dtype, + use_resource=self._use_resource, + constraint=self._constraint) + return self._pure_variable_scope.__enter__() else: + self._current_name_scope = None # This can only happen if someone is entering the root variable scope. - with _pure_variable_scope( - name_or_scope, - reuse=reuse, - initializer=initializer, - regularizer=regularizer, - caching_device=caching_device, - partitioner=partitioner, - custom_getter=custom_getter, - dtype=dtype, - use_resource=use_resource, - constraint=constraint) as vs: - yield vs + self._pure_variable_scope = _pure_variable_scope( + self._name_or_scope, + reuse=self._reuse, + initializer=self._initializer, + regularizer=self._regularizer, + caching_device=self._caching_device, + partitioner=self._partitioner, + custom_getter=self._custom_getter, + dtype=self._dtype, + use_resource=self._use_resource, + constraint=self._constraint) + return self._pure_variable_scope.__enter__() + else: # Here name_or_scope is None. Using default name, but made unique. - if reuse: + if self._reuse: raise ValueError("reuse=True cannot be used without a name_or_scope") - with ops.name_scope(default_name) as scope: - unique_default_name = _get_unique_variable_scope(default_name) - with _pure_variable_scope( - unique_default_name, - initializer=initializer, - regularizer=regularizer, - caching_device=caching_device, - partitioner=partitioner, - custom_getter=custom_getter, - old_name_scope=scope, - dtype=dtype, - use_resource=use_resource, - constraint=constraint) as vs: - yield vs + self._current_name_scope = ops.name_scope(self._default_name) + current_name_scope_name = self._current_name_scope.__enter__() + unique_default_name = _get_unique_variable_scope(self._default_name) + self._pure_variable_scope = _pure_variable_scope( + unique_default_name, + initializer=self._initializer, + regularizer=self._regularizer, + caching_device=self._caching_device, + partitioner=self._partitioner, + custom_getter=self._custom_getter, + old_name_scope=current_name_scope_name, + dtype=self._dtype, + use_resource=self._use_resource, + constraint=self._constraint) + return self._pure_variable_scope.__enter__() + + def __exit__(self, type_arg, value_arg, traceback_arg): + self._pure_variable_scope.__exit__(type_arg, value_arg, traceback_arg) + if self._current_name_scope: + self._current_name_scope.__exit__(type_arg, value_arg, traceback_arg) + if self._in_graph_mode: + self._graph_context_manager.__exit__(type_arg, value_arg, traceback_arg) # pylint: disable=g-doc-return-or-yield diff --git a/tensorflow/tools/api/golden/tensorflow.errors.pbtxt b/tensorflow/tools/api/golden/tensorflow.errors.pbtxt index 0ad1c19603..c5fe49baab 100644 --- a/tensorflow/tools/api/golden/tensorflow.errors.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.errors.pbtxt @@ -136,6 +136,10 @@ tf_module { name: "UnknownError" mtype: "" } + member { + name: "raise_exception_on_not_ok_status" + mtype: "" + } member_method { name: "error_code_from_exception_type" argspec: "args=[\'cls\'], varargs=None, keywords=None, defaults=None" @@ -144,8 +148,4 @@ tf_module { name: "exception_type_from_error_code" argspec: "args=[\'error_code\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "raise_exception_on_not_ok_status" - argspec: "args=[], varargs=args, keywords=kwds, defaults=None" - } } diff --git a/tensorflow/tools/api/golden/tensorflow.errors.raise_exception_on_not_ok_status.pbtxt b/tensorflow/tools/api/golden/tensorflow.errors.raise_exception_on_not_ok_status.pbtxt new file mode 100644 index 0000000000..5d25ec769a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.errors.raise_exception_on_not_ok_status.pbtxt @@ -0,0 +1,8 @@ +path: "tensorflow.errors.raise_exception_on_not_ok_status" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.backend.name_scope.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.backend.name_scope.pbtxt new file mode 100644 index 0000000000..43692a6c73 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.backend.name_scope.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.keras.backend.name_scope" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.backend.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.backend.pbtxt index 6204ffa814..44fbe0f7a0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.backend.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.backend.pbtxt @@ -1,5 +1,9 @@ path: "tensorflow.keras.backend" tf_module { + member { + name: "name_scope" + mtype: "" + } member_method { name: "abs" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" @@ -288,10 +292,6 @@ tf_module { name: "moving_average_update" argspec: "args=[\'x\', \'value\', \'momentum\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "name_scope" - argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " - } member_method { name: "ndim" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.name_scope.pbtxt b/tensorflow/tools/api/golden/tensorflow.name_scope.pbtxt new file mode 100644 index 0000000000..107f066c29 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.name_scope.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.name_scope" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 5ecf34d2ed..32a86e420a 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -392,6 +392,10 @@ tf_module { name: "metrics" mtype: "" } + member { + name: "name_scope" + mtype: "" + } member { name: "newaxis" mtype: "" @@ -508,6 +512,10 @@ tf_module { name: "user_ops" mtype: "" } + member { + name: "variable_scope" + mtype: "" + } member { name: "variance_scaling_initializer" mtype: "" @@ -1380,10 +1388,6 @@ tf_module { name: "multiply" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "name_scope" - argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " - } member_method { name: "negative" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -2028,10 +2032,6 @@ tf_module { name: "variable_op_scope" argspec: "args=[\'values\', \'name_or_scope\', \'default_name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } - member_method { - name: "variable_scope" - argspec: "args=[\'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " - } member_method { name: "variables_initializer" argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt new file mode 100644 index 0000000000..de1ad7e860 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.variable_scope.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.variable_scope" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + } +} -- GitLab From 32dc203f55a7462ddf780c68d619af574daedd46 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 4 Oct 2017 15:59:02 -0700 Subject: [PATCH 024/909] Improve gradient shape validation errors. PiperOrigin-RevId: 171077826 --- tensorflow/python/ops/gradients_impl.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index cb7d409f3b..d9b14de984 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -594,11 +594,19 @@ def gradients(ys, # If no grad_fn is defined or none of out_grads is available, # just propagate a list of None backwards. in_grads = [None] * len(op.inputs) - for t_in, in_grad in zip(op.inputs, in_grads): + for i, (t_in, in_grad) in enumerate(zip(op.inputs, in_grads)): if in_grad is not None: if (isinstance(in_grad, ops.Tensor) and t_in.dtype != dtypes.resource): - in_grad.set_shape(t_in.get_shape()) + try: + in_grad.set_shape(t_in.get_shape()) + except ValueError: + raise ValueError( + "Incompatible shapes between op input and calculated " + "input gradient. Forward operation: %s. Input index: %d. " + "Original input shape: %s. " + "Calculated input gradient shape: %s" + % (op.name, i, t_in.shape, in_grad.shape)) _SetGrad(grads, t_in, in_grad) if loop_state: loop_state.ExitGradWhileContext(op, before=False) -- GitLab From c57a4ace4a9a9a5cf871e6a090a4252f0c9ef2ad Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 4 Oct 2017 16:10:19 -0700 Subject: [PATCH 025/909] Fix error when loading s3 file system library. If attempting to call tf.load_file_system_library on the S3 library you would previously get an error similiar to... s3_file_system.so: undefined symbol: _ZN5nsync13nsync_mu_lockEPNS_11nsync_mu_s_E Changing the build rule to be tf_cc_binary instead of cc_binary fixes this issue. PiperOrigin-RevId: 171079804 --- tensorflow/contrib/s3/BUILD | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/s3/BUILD b/tensorflow/contrib/s3/BUILD index a4daed01e7..b7bc1a11d6 100644 --- a/tensorflow/contrib/s3/BUILD +++ b/tensorflow/contrib/s3/BUILD @@ -9,6 +9,7 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", + "tf_cc_binary", "tf_cc_test", ) @@ -24,7 +25,7 @@ filegroup( visibility = ["//tensorflow:__subpackages__"], ) -cc_binary( +tf_cc_binary( name = "s3_file_system.so", srcs = [ "s3_crypto.cc", -- GitLab From cd12a89b4cbc05b16667695fa483d9c375821b99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 16:14:38 -0700 Subject: [PATCH 026/909] Add shape inference function for _XlaRecv. PiperOrigin-RevId: 171080445 --- tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc b/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc index b6947bfe57..4b41c16a8b 100644 --- a/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/sendrecv_ops.cc @@ -37,7 +37,14 @@ REGISTER_OP("_XLARecv") .Attr("tensor_name: string") .Attr("shape: shape") .SetIsStateful() - .SetShapeFn(shape_inference::UnknownShape) + .SetShapeFn([](shape_inference::InferenceContext* c) { + TensorShape shape_attr; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape_attr)); + shape_inference::ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromTensorShape(shape_attr, &s)); + c->set_output(0, s); + return Status::OK(); + }) .Doc(R"doc( Receives the named tensor from another XLA computation. -- GitLab From cfad8bfa77a8adfa093599c277b459708f0a95ff Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 4 Oct 2017 16:50:02 -0700 Subject: [PATCH 027/909] Don't use dlsym to resolve symbols in the CPU JIT Instead of resolving symbols via dlsym when JITting for the CPU backend, use a registry based mechanism. This lets us kill off the --export_dynamic hack that we used to need for CustomCall on the CPU backend. PiperOrigin-RevId: 171084886 --- tensorflow/compiler/tf2xla/kernels/BUILD | 5 +- .../kernels/gather_op_kernel_float_int32.cc | 3 + .../kernels/gather_op_kernel_float_int64.cc | 3 + .../index_ops_kernel_argmax_float_1d.cc | 3 + .../index_ops_kernel_argmax_float_2d.cc | 3 + tensorflow/compiler/xla/service/cpu/BUILD | 12 ++ .../cpu/custom_call_target_registry.cc | 39 ++++ .../service/cpu/custom_call_target_registry.h | 74 +++++++ .../xla/service/cpu/simple_orc_jit.cc | 195 ++++++++++-------- tensorflow/compiler/xla/tests/BUILD | 3 +- .../compiler/xla/tests/custom_call_test.cc | 14 +- tensorflow/compiler/xla/xla.bzl | 8 - 12 files changed, 266 insertions(+), 96 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc create mode 100644 tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 6a0c4fef75..915c95e945 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -5,7 +5,6 @@ package( ) load("//tensorflow:tensorflow.bzl", "tf_kernel_library") -load("//tensorflow/compiler/xla:xla.bzl", "export_dynamic_linkopts") tf_kernel_library( name = "xla_ops", @@ -155,6 +154,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/tf2xla:xla_local_runtime_context", + "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:gather_functor_hdr", @@ -169,6 +169,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/tf2xla:xla_local_runtime_context", + "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:gather_functor_hdr", @@ -182,6 +183,7 @@ cc_library( srcs = ["index_ops_kernel_argmax_float_1d.cc"], visibility = ["//visibility:public"], deps = [ + "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -193,6 +195,7 @@ cc_library( srcs = ["index_ops_kernel_argmax_float_2d.cc"], visibility = ["//visibility:public"], deps = [ + "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc index 33b1b087d0..0b44e0c6f8 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc @@ -17,6 +17,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gather_functor.h" #include "tensorflow/core/platform/dynamic_annotations.h" @@ -70,3 +71,5 @@ EIGEN_STRONG_INLINE void gather_float_int32_xla_impl(float* out, void** data) { extern "C" void TF_EXPORT gather_float_int32_xla_impl(float* out, void** data) { tensorflow::gather_float_int32_xla_impl(out, data); } + +REGISTER_CUSTOM_CALL_TARGET(gather_float_int32_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc index 5e2d872ce0..d7c7a7bf2c 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc @@ -17,6 +17,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gather_functor.h" #include "tensorflow/core/platform/dynamic_annotations.h" @@ -70,3 +71,5 @@ EIGEN_STRONG_INLINE void gather_float_int64_xla_impl(float* out, void** data) { extern "C" void TF_EXPORT gather_float_int64_xla_impl(float* out, void** data) { tensorflow::gather_float_int64_xla_impl(out, data); } + +REGISTER_CUSTOM_CALL_TARGET(gather_float_int64_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc index afbd64ca50..47cf8c6675 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc @@ -16,6 +16,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/macros.h" @@ -47,3 +48,5 @@ EIGEN_STRONG_INLINE void argmax_float_1d_xla_impl(void* out, void** data) { extern "C" void TF_EXPORT argmax_float_1d_xla_impl(void* out, void** data) { tensorflow::argmax_float_1d_xla_impl(out, data); } + +REGISTER_CUSTOM_CALL_TARGET(argmax_float_1d_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc index 841ff2f4df..9b83392d8f 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc @@ -16,6 +16,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/macros.h" @@ -49,3 +50,5 @@ EIGEN_STRONG_INLINE void argmax_float_2d_xla_impl(void* out, void** data) { extern "C" void TF_EXPORT argmax_float_2d_xla_impl(void* out, void** data) { tensorflow::argmax_float_2d_xla_impl(out, data); } + +REGISTER_CUSTOM_CALL_TARGET(argmax_float_2d_xla_impl); diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index fa6e5b2313..5d13b82427 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -129,6 +129,7 @@ cc_library( ":cpu_runtime_avx", ":cpu_runtime_neon", ":cpu_runtime_sse4_1", + ":custom_call_target_registry", ":disassembler", ":runtime_conv2d", ":runtime_matmul", @@ -674,6 +675,17 @@ cc_library( ], ) +cc_library( + name = "custom_call_target_registry", + srcs = [ + "custom_call_target_registry.cc", + ], + hdrs = [ + "custom_call_target_registry.h", + ], + visibility = ["//visibility:public"], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc new file mode 100644 index 0000000000..5f5803874b --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc @@ -0,0 +1,39 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" + +namespace xla { +namespace cpu { + +CustomCallTargetRegistry* CustomCallTargetRegistry::Global() { + static auto* registry = new CustomCallTargetRegistry; + return registry; +} + +void CustomCallTargetRegistry::Register(const std::string& symbol, + void* address) { + std::lock_guard lock(mu_); + registered_symbols_[symbol] = address; +} + +void* CustomCallTargetRegistry::Lookup(const std::string& symbol) const { + std::lock_guard lock(mu_); + auto it = registered_symbols_.find(symbol); + return it == registered_symbols_.end() ? nullptr : it->second; +} + +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h new file mode 100644 index 0000000000..2994642356 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h @@ -0,0 +1,74 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ + +// This file is depended on by kernels that have to build for mobile devices. +// For this reason, we avoid relying on TensorFlow and instead only use the +// standard C++ library. + +#include // NOLINT +#include +#include + +namespace xla { +namespace cpu { + +// The CPU JIT compiler uses this registry to resolve symbolic CustomCall +// targets; so when using the CPU JIT, CustomCall targets need to be registered +// here with the symbol name used in the CustomCall. +// +// The XLA AOT compiler links using a standard offline linker; so when compiling +// in AOT mode, you *also* need to make sure the name of the callee (presumably +// implemented in C++) matches up with the symbolic name used in the CustomCall. +// +// We maintain the registry in both the JIT and the AOT cases for simplicity, +// but we only use it when running in JIT mode. +class CustomCallTargetRegistry { + public: + static CustomCallTargetRegistry* Global(); + + void Register(const std::string& symbol, void* address); + void* Lookup(const std::string& symbol) const; + + private: + std::unordered_map registered_symbols_; + mutable std::mutex mu_; +}; + +class RegisterCustomCallTarget { + public: + explicit RegisterCustomCallTarget(const std::string& name, void* address) { + CustomCallTargetRegistry::Global()->Register(name, address); + } +}; + +#define REGISTER_CUSTOM_CALL_CONCAT(a, b) a##b + +#define REGISTER_CUSTOM_CALL_TARGET_WITH_SYM_HELPER(symbol, address, counter) \ + static ::xla::cpu::RegisterCustomCallTarget REGISTER_CUSTOM_CALL_CONCAT( \ + custom_call_target_register, counter)(symbol, \ + reinterpret_cast(address)) + +#define REGISTER_CUSTOM_CALL_TARGET_WITH_SYM(symbol, address) \ + REGISTER_CUSTOM_CALL_TARGET_WITH_SYM_HELPER(symbol, address, __COUNTER__) + +#define REGISTER_CUSTOM_CALL_TARGET(function) \ + REGISTER_CUSTOM_CALL_TARGET_WITH_SYM(#function, function) + +} // namespace cpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index c3c11df090..0711c9de27 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" @@ -42,90 +43,10 @@ namespace xla { namespace cpu { namespace { -// Converts a symbol 'name' into the form expected by dlsym(). -std::string CanonicalizeSymbol(const std::string& name) { -#if defined(__APPLE__) - // On Mac OS X, dlsym() expects names not to be prefixed with a leading - // underscore. - if (!name.empty() && name.front() == '_') { - return name.substr(1); - } -#endif - return name; -} - -class JITSymbolTable { - public: - JITSymbolTable() { Populate(); } - - void* Lookup(llvm::StringRef jit_symbol_name) const { - auto it = jit_symbol_table_.find(jit_symbol_name); - return it == jit_symbol_table_.end() ? nullptr : it->getValue(); - } - - static bool MustBeInTable(llvm::StringRef name) { - // In particular, names starting with - // runtime::kXlaCpuRuntimeSymbolNamePrefix should not be dlsym'ed. - return name.startswith(runtime::kXlaCpuRuntimeSymbolNamePrefix); - } - - private: - void AddJITSymbolToTable(llvm::StringRef jit_symbol_name, - llvm::StringRef cpp_symbol_name, - void* jit_symbol_value) { - // The JIT symbol name and the C++ symbol name (with an extern "C" linkage) - // need to match, otherwise AOT links will fail. - CHECK(jit_symbol_name == cpp_symbol_name); - CHECK(jit_symbol_table_.insert({jit_symbol_name, jit_symbol_value}).second); - } - - void Populate() { -#define ADD_JIT_SYMBOL_TO_TABLE(base_name) \ - do { \ - AddJITSymbolToTable( \ - xla::cpu::runtime::k##base_name##SymbolName, \ - "__xla_cpu_runtime_" #base_name, \ - reinterpret_cast(__xla_cpu_runtime_##base_name)); \ - } while (false) - - ADD_JIT_SYMBOL_TO_TABLE(AcquireInfeedBufferForDequeue); - ADD_JIT_SYMBOL_TO_TABLE(ReleaseInfeedBufferAfterDequeue); - ADD_JIT_SYMBOL_TO_TABLE(AcquireOutfeedBufferForPopulation); - ADD_JIT_SYMBOL_TO_TABLE(ReleaseOutfeedBufferAfterPopulation); - ADD_JIT_SYMBOL_TO_TABLE(ExpV8F32AVX); - ADD_JIT_SYMBOL_TO_TABLE(LogV8F32AVX); - ADD_JIT_SYMBOL_TO_TABLE(ExpV4F32SSE); - ADD_JIT_SYMBOL_TO_TABLE(LogV4F32SSE); - ADD_JIT_SYMBOL_TO_TABLE(ExpV4F32NEON); - ADD_JIT_SYMBOL_TO_TABLE(LogV4F32NEON); - ADD_JIT_SYMBOL_TO_TABLE(EigenConvF32); - ADD_JIT_SYMBOL_TO_TABLE(EigenMatMulF32); - ADD_JIT_SYMBOL_TO_TABLE(EigenMatMulF64); - ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedConvF32); - ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedMatMulF32); - ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedMatMulF64); - -#undef ADD_JIT_SYMBOL_TO_TABLE - } - - llvm::StringMap jit_symbol_table_; -}; - -const JITSymbolTable& GetJITSymbolTable() { - static JITSymbolTable* symbol_table = new JITSymbolTable; - return *symbol_table; -} - // A simple SymbolResolver that delegates to the host dynamic linker. struct SimpleResolver : public llvm::JITSymbolResolver { llvm::JITSymbol findSymbol(const std::string& name) override { - std::string canonical_name = CanonicalizeSymbol(name); - const JITSymbolTable& jit_symbol_table = GetJITSymbolTable(); - - void* func_addr = JITSymbolTable::MustBeInTable(canonical_name) - ? jit_symbol_table.Lookup(canonical_name) - : dlsym(RTLD_DEFAULT, canonical_name.c_str()); - + void* func_addr = CustomCallTargetRegistry::Global()->Lookup(name); if (func_addr == nullptr) { return nullptr; } @@ -238,5 +159,117 @@ llvm::JITSymbol SimpleOrcJIT::FindSymbol(const std::string& name) { return nullptr; } +namespace { +// Register some known symbols with the CustomCallTargetRegistry. +bool RegisterKnownJITSymbols() { + CustomCallTargetRegistry* registry = CustomCallTargetRegistry::Global(); + +#define REGISTER_CPU_RUNTIME_SYMBOL(base_name) \ + do { \ + auto* function_address = \ + reinterpret_cast(__xla_cpu_runtime_##base_name); \ + registry->Register(xla::cpu::runtime::k##base_name##SymbolName, \ + function_address); \ + CHECK_EQ( \ + tensorflow::StringPiece(xla::cpu::runtime::k##base_name##SymbolName), \ + "__xla_cpu_runtime_" #base_name); \ + } while (false) + + REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue); + REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue); + REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation); + REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation); + REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX); + REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX); + REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE); + REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE); + REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON); + REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON); + REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); + +#undef REGISTER_CPU_RUNTIME_SYMBOL + +#define REGISTER_LIBM_SYMBOL(name) \ + do { \ + /* Register both the F32 and F64 variants of the libm symbol. */ \ + registry->Register(#name "f", reinterpret_cast(name##f)); \ + registry->Register(#name, reinterpret_cast(name)); \ + } while (false) + + REGISTER_LIBM_SYMBOL(acos); + REGISTER_LIBM_SYMBOL(acosh); + REGISTER_LIBM_SYMBOL(asin); + REGISTER_LIBM_SYMBOL(asinh); + REGISTER_LIBM_SYMBOL(atan); + REGISTER_LIBM_SYMBOL(atan2); + REGISTER_LIBM_SYMBOL(atanh); + REGISTER_LIBM_SYMBOL(cbrt); + REGISTER_LIBM_SYMBOL(ceil); + REGISTER_LIBM_SYMBOL(copysign); + REGISTER_LIBM_SYMBOL(cos); + REGISTER_LIBM_SYMBOL(cosh); + REGISTER_LIBM_SYMBOL(erf); + REGISTER_LIBM_SYMBOL(erfc); + REGISTER_LIBM_SYMBOL(exp); + REGISTER_LIBM_SYMBOL(exp2); + REGISTER_LIBM_SYMBOL(expm1); + REGISTER_LIBM_SYMBOL(fabs); + REGISTER_LIBM_SYMBOL(fdim); + REGISTER_LIBM_SYMBOL(floor); + REGISTER_LIBM_SYMBOL(fma); + REGISTER_LIBM_SYMBOL(fmax); + REGISTER_LIBM_SYMBOL(fmin); + REGISTER_LIBM_SYMBOL(fmod); + REGISTER_LIBM_SYMBOL(frexp); + REGISTER_LIBM_SYMBOL(hypot); + REGISTER_LIBM_SYMBOL(ilogb); + REGISTER_LIBM_SYMBOL(ldexp); + REGISTER_LIBM_SYMBOL(lgamma); + REGISTER_LIBM_SYMBOL(llrint); + REGISTER_LIBM_SYMBOL(llround); + REGISTER_LIBM_SYMBOL(log); + REGISTER_LIBM_SYMBOL(log10); + REGISTER_LIBM_SYMBOL(log1p); + REGISTER_LIBM_SYMBOL(log2); + REGISTER_LIBM_SYMBOL(logb); + REGISTER_LIBM_SYMBOL(lrint); + REGISTER_LIBM_SYMBOL(lround); + REGISTER_LIBM_SYMBOL(modf); + REGISTER_LIBM_SYMBOL(nan); + REGISTER_LIBM_SYMBOL(nearbyint); + REGISTER_LIBM_SYMBOL(nextafter); + REGISTER_LIBM_SYMBOL(nexttoward); + REGISTER_LIBM_SYMBOL(pow); + REGISTER_LIBM_SYMBOL(remainder); + REGISTER_LIBM_SYMBOL(remquo); + REGISTER_LIBM_SYMBOL(rint); + REGISTER_LIBM_SYMBOL(round); + REGISTER_LIBM_SYMBOL(scalbln); + REGISTER_LIBM_SYMBOL(scalbn); + REGISTER_LIBM_SYMBOL(sin); + REGISTER_LIBM_SYMBOL(sincos); + REGISTER_LIBM_SYMBOL(sinh); + REGISTER_LIBM_SYMBOL(sqrt); + REGISTER_LIBM_SYMBOL(tan); + REGISTER_LIBM_SYMBOL(tanh); + REGISTER_LIBM_SYMBOL(tgamma); + REGISTER_LIBM_SYMBOL(trunc); + +#undef REGISTER_LIBM_SYMBOL + + registry->Register("memcpy", reinterpret_cast(memcpy)); + registry->Register("memmove", reinterpret_cast(memmove)); + registry->Register("memset", reinterpret_cast(memset)); + return true; +} + +bool unused = RegisterKnownJITSymbols(); +} // namespace + } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index e45b839afd..84bebd4708 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -23,7 +23,6 @@ filegroup( ]), ) -load("//tensorflow/compiler/xla:xla.bzl", "export_dynamic_linkopts") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test_library") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites") @@ -981,13 +980,13 @@ xla_test( xla_test( name = "custom_call_test", srcs = ["custom_call_test.cc"], - linkopts = export_dynamic_linkopts, deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index 342478bc74..74f73a1ddc 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -31,19 +32,19 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/test.h" - -extern "C" void TF_EXPORT R0F32Add2(float* out, float** in) { +namespace { +void R0F32Add2(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float*)); *out = **in + 2.0f; } -extern "C" void TF_EXPORT R2F32ReduceSum(float* out, float** in) { +void R2F32ReduceSum(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); float* array = in[0]; *out = array[0] + array[1] + array[2] + array[3]; } -extern "C" void TF_EXPORT Add1ToValues(float* out, float** in) { +void Add1ToValues(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); float* array = in[0]; out[0] = array[0] + 1; @@ -51,6 +52,11 @@ extern "C" void TF_EXPORT Add1ToValues(float* out, float** in) { out[2] = array[2] + 1; out[3] = array[3] + 1; } +} // namespace + +REGISTER_CUSTOM_CALL_TARGET(R0F32Add2); +REGISTER_CUSTOM_CALL_TARGET(R2F32ReduceSum); +REGISTER_CUSTOM_CALL_TARGET(Add1ToValues); namespace xla { namespace { diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl index 22e70ec97a..3fa5bcc1df 100644 --- a/tensorflow/compiler/xla/xla.bzl +++ b/tensorflow/compiler/xla/xla.bzl @@ -17,11 +17,3 @@ def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0): protoc="@protobuf_archive//:protoc", testonly=testonly, visibility=visibility,) - -# Flags required for modules that export symbols that are to be called by the -# XLA CustomCall operator. CustomCall must be able to find symbols with dlsym(), -# which on Linux requires we link with --export-dynamic. -export_dynamic_linkopts = select({ - "//tensorflow:darwin": [], - "//conditions:default": ["-Wl,--export-dynamic"], -}) -- GitLab From 875df6262977eebd73d558600c5a216882b88164 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 4 Oct 2017 17:25:04 -0700 Subject: [PATCH 028/909] [XLA:CPU] Mark loads of parameter addresses as invariant. Also delete a dead member in the IrEmitter, make param names match between the header and the cc file, and make a cosmetic comment fix. PiperOrigin-RevId: 171088993 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 12 ++++++++++-- tensorflow/compiler/xla/service/cpu/ir_emitter.h | 12 ++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 1e81a815d8..8b777bcf84 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1457,6 +1457,14 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) { llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_); llvm::LoadInst* param_address_untyped = ir_builder_.CreateLoad(param_address_offset); + if (hlo_module_config_.debug_options() + .xla_llvm_enable_invariant_load_metadata()) { + // We never reassign parameters, so this load is invariant. + param_address_untyped->setMetadata( + llvm::LLVMContext::MD_invariant_load, + llvm::MDNode::get(param_address_untyped->getContext(), /*MDs=*/{})); + } + llvm::Value* param_address_typed = ir_builder_.CreateBitCast( param_address_untyped, IrShapeType(param_shape)->getPointerTo()); emitted_value_[parameter] = param_address_typed; @@ -2924,8 +2932,8 @@ llvm::Value* IrEmitter::EmitTempBufferPointer( ir_builder_.CreateLoad(tempbuf_address_ptr); if (hlo_module_config_.debug_options() .xla_llvm_enable_invariant_load_metadata()) { - // Loading the address of a buffer is invariant of the point at which the - // load is executed in the program because we never reassign buffers. + // Loading the address of a buffer is invariant of the point at which the + // load is executed in the program because we never reassign buffers. tempbuf_address_base->setMetadata( llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(tempbuf_address_base->getContext(), /*MDs=*/{})); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 8042e03e69..05663b6038 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -146,7 +146,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { // // Default action which emits code for most operations. Operations which are // special in some way are handled explicitly in HandleFoo methods. - Status DefaultAction(HloInstruction* hlo_instruction) override; + Status DefaultAction(HloInstruction* hlo) override; Status HandleBitcast(HloInstruction* bitcast) override; Status HandleConstant(HloInstruction* constant, @@ -175,7 +175,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleReduceWindow(HloInstruction* reduce_window, HloInstruction* operand, const Window& window, HloComputation* function) override; - Status HandleSelectAndScatter(HloInstruction* instruction) override; + Status HandleSelectAndScatter(HloInstruction* select_and_scatter) override; Status HandleSend(HloInstruction* send) override; Status HandleSlice(HloInstruction* slice, HloInstruction* /*operand*/) override; @@ -208,7 +208,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status FinishVisit(HloInstruction* root) override; Status Preprocess(HloInstruction* hlo) override; - Status Postprocess(HloInstruction* visited) override; + Status Postprocess(HloInstruction* hlo) override; private: // Private helper to initialize an IR function for the computation. @@ -304,7 +304,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { void EmitArrayFunctionCallInto( llvm::Function* function, tensorflow::gtl::ArraySlice parameter_addresses, - llvm::Value* return_value, tensorflow::StringPiece name); + llvm::Value* return_value_buffer, tensorflow::StringPiece name); // Array function call emitter. Returns a Value for the function's return // value buffer address. The return value buffer is alloca'ed by this @@ -447,10 +447,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { const llvm_ir::IrArray& target_array, const llvm_ir::IrArray& source_array); - // Name of the computation entry function. This function serves as the - // top-level "main" of the computation and will be invoked by the JIT. - string entry_function_name_; - // Assignment of the temporary buffers needed by the computation and their // shape information. const BufferAssignment& assignment_; -- GitLab From fa86731b3dd081cf437fbeecbfcae30596c2873b Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Wed, 4 Oct 2017 17:26:34 -0700 Subject: [PATCH 029/909] Automated g4 rollback of changelist 171070760 PiperOrigin-RevId: 171089134 --- tensorflow/python/estimator/exporter.py | 26 +------ tensorflow/python/estimator/exporter_test.py | 41 +--------- tensorflow/python/estimator/training.py | 37 +++------ tensorflow/python/estimator/training_test.py | 81 -------------------- 4 files changed, 16 insertions(+), 169 deletions(-) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 2faca11f6e..505820dd93 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -40,8 +40,7 @@ class Exporter(object): pass @abc.abstractmethod - def export(self, estimator, export_path, checkpoint_path, eval_result, - is_the_final_export): + def export(self, estimator, export_path, checkpoint_path, eval_result): """Exports the given `Estimator` to a specific format. Args: @@ -49,13 +48,6 @@ class Exporter(object): export_path: A string containing a directory where to write the export. checkpoint_path: The checkpoint path to export. eval_result: The output of `Estimator.evaluate` on this checkpoint. - is_the_final_export: This boolean is True when this is an export in the - end of training. It is False for the intermediate exports during - the training. - - When passing `Exporter` to `tf.estimator.train_and_evaluate` - `is_the_final_export` is always False if `TrainSpec.max_steps` is - `None`. Returns: The string path to the exported directory or `None` if export is skipped. @@ -74,8 +66,7 @@ class LatestExporter(Exporter): serving_input_fn, assets_extra=None, as_text=False, - exports_to_keep=5, - only_the_final_export=False): + exports_to_keep=5): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: @@ -95,8 +86,6 @@ class LatestExporter(Exporter): exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to `None` to disable garbage collection. - only_the_final_export: Only the final export in the end of training will - happen if this is set to True. Raises: ValueError: if any arguments is invalid. @@ -106,8 +95,6 @@ class LatestExporter(Exporter): self._assets_extra = assets_extra self._as_text = as_text self._exports_to_keep = exports_to_keep - self._only_the_final_export = only_the_final_export - if exports_to_keep is not None and exports_to_keep <= 0: raise ValueError( '`exports_to_keep`, if provided, must be positive number') @@ -116,14 +103,7 @@ class LatestExporter(Exporter): def name(self): return self._name - def export(self, estimator, export_path, checkpoint_path, eval_result, - is_the_final_export): - if not is_the_final_export and self._only_the_final_export: - return None - - if is_the_final_export: - tf_logging.info('Performing the final export in the end of training.') - + def export(self, estimator, export_path, checkpoint_path, eval_result): export_result = estimator.export_savedmodel( export_path, self._serving_input_fn, diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 01582ac595..2ceff1bfd6 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -42,7 +42,7 @@ class LatestExporterTest(test.TestCase): serving_input_fn=_serving_input_fn, exports_to_keep=0) - def test_latest_exporter(self): + def test_saved_model_exporter(self): def _serving_input_fn(): pass @@ -60,42 +60,7 @@ class LatestExporterTest(test.TestCase): estimator.export_savedmodel.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, - "checkpoint_path", {}, False) - - self.assertEqual("export_result_path", export_result) - estimator.export_savedmodel.assert_called_with( - export_dir_base, - _serving_input_fn, - assets_extra={"from/path": "to/path"}, - as_text=False, - checkpoint_path="checkpoint_path") - - def test_only_the_last_export_is_saved(self): - - def _serving_input_fn(): - pass - - export_dir_base = tempfile.mkdtemp() + "export/" - gfile.MkDir(export_dir_base) - - exporter = exporter_lib.LatestExporter( - name="latest_exporter", - serving_input_fn=_serving_input_fn, - assets_extra={"from/path": "to/path"}, - as_text=False, - exports_to_keep=5, - only_the_final_export=True) - estimator = test.mock.Mock(spec=estimator_lib.Estimator) - estimator.export_savedmodel.return_value = "export_result_path" - - export_result = exporter.export(estimator, export_dir_base, - "checkpoint_path", {}, False) - - self.assertFalse(estimator.export_savedmodel.called) - self.assertEqual(None, export_result) - - export_result = exporter.export(estimator, export_dir_base, - "checkpoint_path", {}, True) + "checkpoint_path", {}) self.assertEqual("export_result_path", export_result) estimator.export_savedmodel.assert_called_with( @@ -128,7 +93,7 @@ class LatestExporterTest(test.TestCase): estimator = test.mock.Mock(spec=estimator_lib.Estimator) # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. - exporter.export(estimator, export_dir_base, None, None, False) + exporter.export(estimator, export_dir_base, None, None) self.assertFalse(gfile.Exists(export_dir_1)) self.assertFalse(gfile.Exists(export_dir_2)) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 0a558a67b9..1bed19760b 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -519,11 +519,8 @@ class _TrainingExecutor(object): class NewCheckpointListener( basic_session_run_hooks.CheckpointSaverListener): - def __init__(self, estimator, eval_spec, max_training_steps): - # pylint: disable=protected-access - self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec, - max_training_steps) - # pylint: enable=protected-access + def __init__(self, estimator, eval_spec): + self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec) # pylint: disable=protected-access def after_save(self, session, global_step_value): del session, global_step_value @@ -531,10 +528,8 @@ class _TrainingExecutor(object): # When the underlying `Estimator` object saves a new checkpoint, we would # like this callback to be called so that evaluation and export can trigger. - saving_listeners = [ - NewCheckpointListener(self._estimator, self._eval_spec, - self._train_spec.max_steps) - ] + saving_listeners = [NewCheckpointListener(self._estimator, self._eval_spec)] + return self._start_distributed_training(saving_listeners=saving_listeners) def run_evaluator(self): @@ -571,8 +566,7 @@ class _TrainingExecutor(object): 'after {} secs (eval_spec.throttle_secs) or training is ' 'finished.'.format(self._eval_spec.throttle_secs)) - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, - self._train_spec.max_steps) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) while True: self._estimator.train( @@ -642,8 +636,7 @@ class _TrainingExecutor(object): time.sleep(start_delay_secs) latest_eval_result = None - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, - self._train_spec.max_steps) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) while True: if latest_eval_result: @@ -670,12 +663,11 @@ class _TrainingExecutor(object): class _Evaluator(object): """A helper class to call `Estimator.evaluate` and export model.""" - def __init__(self, estimator, eval_spec, max_training_steps): + def __init__(self, estimator, eval_spec): self._estimator = estimator self._eval_spec = eval_spec self._previous_ckpt_path = None self._last_warning_time = 0 - self._max_training_steps = max_training_steps def evaluate_and_export(self): """Evaluate and (maybe) export the current model. @@ -720,14 +712,7 @@ class _TrainingExecutor(object): 'Internal error: `Estimator.evaluate` result should have ' '`global_step` in result. Given {}'.format(eval_result)) - # TODO(isaprykin): There is a potential race condition here in the - # distributed setting. The worker job that performs training - # might stop at a later global step value than the evalutor job. - is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >= - self._max_training_steps - if self._max_training_steps else False) - self._export_eval_result(eval_result, latest_ckpt_path, - is_the_final_export) + self._export_eval_result(eval_result, latest_ckpt_path) self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path @@ -740,8 +725,7 @@ class _TrainingExecutor(object): logging.warning(message) self._last_warning_time = current_time - def _export_eval_result(self, eval_result, checkpoint_path, - is_the_final_export): + def _export_eval_result(self, eval_result, checkpoint_path): """Export `eval_result` according to exporters in `EvalSpec`.""" export_dir_base = os.path.join( compat.as_str_any(self._estimator.model_dir), @@ -754,5 +738,4 @@ class _TrainingExecutor(object): compat.as_str_any(export_dir_base), compat.as_str_any(exporter.name)), checkpoint_path=checkpoint_path, - eval_result=eval_result, - is_the_final_export=is_the_final_export) + eval_result=eval_result) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 08d11d7d25..e4c400ca7f 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -802,46 +802,6 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, exporter.export.call_count) - def test_final_export_is_true_in_the_end(self): - training_max_step = 200 - - mock_est = test.mock.Mock(spec=estimator_lib.Estimator) - mock_est.model_dir = compat.as_bytes(test.get_temp_dir()) - mock_est.evaluate.side_effect = [ - {_GLOBAL_STEP_KEY: training_max_step // 2}, - {_GLOBAL_STEP_KEY: training_max_step} - ] - mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] - - mock_train_spec = test.mock.Mock(spec=training.TrainSpec) - mock_train_spec.max_steps = training_max_step - - mock_est.times_export_fn_was_called = 0 - mock_est.times_the_final_export_was_true = 0 - def export(estimator, export_path, checkpoint_path, eval_result, - is_the_final_export): - del export_path, checkpoint_path, eval_result - estimator.times_export_fn_was_called += 1 - if is_the_final_export: - estimator.times_the_final_export_was_true += 1 - - exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) - exporter.name = 'see_how_many_times_export_is_called' - exporter.export = export - - eval_spec = training.EvalSpec( - input_fn=lambda: 1, - start_delay_secs=0, - throttle_secs=0, - exporters=exporter) - - executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) - executor.run_evaluator() - - self.assertEqual(2, mock_est.evaluate.call_count) - self.assertEqual(2, mock_est.times_export_fn_was_called) - self.assertEqual(1, mock_est.times_the_final_export_was_true) - def test_skip_evaluation_due_to_ckpt(self): training_max_step = 200 mock_est = test.mock.Mock(spec=estimator_lib.Estimator) @@ -1174,47 +1134,6 @@ class TrainingExecutorRunLocalTest(test.TestCase): with self.assertRaisesRegexp(RuntimeError, _STALE_CHECKPOINT_MSG): executor.run_local() - def test_final_export_is_true_in_the_end(self): - mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') - mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn - - mock_est.times_export_fn_was_called = 0 - mock_est.times_the_final_export_was_true = 0 - def export(estimator, export_path, checkpoint_path, eval_result, - is_the_final_export): - del export_path, checkpoint_path, eval_result - estimator.times_export_fn_was_called += 1 - if is_the_final_export: - estimator.times_the_final_export_was_true += 1 - - exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) - exporter.name = 'see_how_many_times_export_is_called' - exporter.export = export - - train_spec = training.TrainSpec( - input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) - eval_spec = training.EvalSpec( - input_fn=lambda: 1, - hooks=[_FakeHook()], - throttle_secs=100, - exporters=exporter) - # should be called 3 times. - mock_est.evaluate.side_effect = [{ - _GLOBAL_STEP_KEY: train_spec.max_steps - 100 - }, { - _GLOBAL_STEP_KEY: train_spec.max_steps - 50 - }, { - _GLOBAL_STEP_KEY: train_spec.max_steps - }] - - executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) - executor.run_local() - - self.assertEqual(3, mock_est.train.call_count) - self.assertEqual(3, mock_est.evaluate.call_count) - self.assertEqual(3, mock_est.times_export_fn_was_called) - self.assertEqual(1, mock_est.times_the_final_export_was_true) - def test_train_and_evaluate_args(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint.return_value = 'checkpoint_path/' -- GitLab From 466d84d2896336390e8dc1efeaaf5e385697b386 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 4 Oct 2017 17:39:52 -0700 Subject: [PATCH 030/909] [XLA] Avoid check-failure when passing bad reduce window arguments. PiperOrigin-RevId: 171090558 --- tensorflow/compiler/xla/client/BUILD | 2 ++ .../xla/client/computation_builder.cc | 16 +++++++++++--- tensorflow/compiler/xla/client/padding.cc | 21 +++++++++++++++++-- tensorflow/compiler/xla/client/padding.h | 11 +++++++++- .../compiler/xla/tests/reduce_window_test.cc | 14 +++++++++++++ 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD index 2b142d933d..b612698143 100644 --- a/tensorflow/compiler/xla/client/BUILD +++ b/tensorflow/compiler/xla/client/BUILD @@ -41,7 +41,9 @@ cc_library( srcs = ["padding.cc"], hdrs = ["padding.h"], deps = [ + "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index a80412e951..15a713513f 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -1433,10 +1433,20 @@ ComputationDataHandle ComputationBuilder::ReduceWindow( return ComputationDataHandle(); } - return ReduceWindowWithGeneralPadding( - operand, init_value, computation, window_dimensions, window_strides, + Status padding_valid = + ValidatePaddingValues(AsInt64Slice(shape.ValueOrDie()->dimensions()), + window_dimensions, window_strides); + if (!padding_valid.ok()) { + first_error_ = padding_valid; + return ComputationDataHandle(); + } + + std::vector> padding_values = MakePadding(AsInt64Slice(shape.ValueOrDie()->dimensions()), - window_dimensions, window_strides, padding)); + window_dimensions, window_strides, padding); + return ReduceWindowWithGeneralPadding(operand, init_value, computation, + window_dimensions, window_strides, + padding_values); } ComputationDataHandle ComputationBuilder::ReduceWindowWithGeneralPadding( diff --git a/tensorflow/compiler/xla/client/padding.cc b/tensorflow/compiler/xla/client/padding.cc index 0b18d8946a..6a9cf466ac 100644 --- a/tensorflow/compiler/xla/client/padding.cc +++ b/tensorflow/compiler/xla/client/padding.cc @@ -17,17 +17,34 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/logging.h" namespace xla { +Status ValidatePaddingValues( + tensorflow::gtl::ArraySlice input_dimensions, + tensorflow::gtl::ArraySlice window_dimensions, + tensorflow::gtl::ArraySlice window_strides) { + bool ok = input_dimensions.size() == window_dimensions.size() && + input_dimensions.size() == window_strides.size(); + if (!ok) { + return InvalidArgument( + "Want input dimensions size %zu = window dimensions size %zu = window " + "strides size %zu", + input_dimensions.size(), window_dimensions.size(), + window_strides.size()); + } + return Status::OK(); +} + std::vector> MakePadding( tensorflow::gtl::ArraySlice input_dimensions, tensorflow::gtl::ArraySlice window_dimensions, tensorflow::gtl::ArraySlice window_strides, Padding padding) { - CHECK_EQ(input_dimensions.size(), window_dimensions.size()); - CHECK_EQ(input_dimensions.size(), window_strides.size()); + TF_CHECK_OK(ValidatePaddingValues(input_dimensions, window_dimensions, + window_strides)); std::vector> low_high_padding; switch (padding) { case Padding::kValid: diff --git a/tensorflow/compiler/xla/client/padding.h b/tensorflow/compiler/xla/client/padding.h index dce2d87e8d..e23b0b3a90 100644 --- a/tensorflow/compiler/xla/client/padding.h +++ b/tensorflow/compiler/xla/client/padding.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -37,6 +38,14 @@ enum class Padding { kValid, }; +// Validates that the slices are acceptable for determining padding -- this can +// be used to check the preconditions of MakePadding below to produce an error +// message that can be returned to the user. +Status ValidatePaddingValues( + tensorflow::gtl::ArraySlice input_dimensions, + tensorflow::gtl::ArraySlice window_dimensions, + tensorflow::gtl::ArraySlice window_strides); + // Returns the padding needed for the base area, given the base area dimensions, // window dimensions, strides, and the type of padding. // @@ -51,7 +60,7 @@ enum class Padding { std::vector> MakePadding( tensorflow::gtl::ArraySlice input_dimensions, tensorflow::gtl::ArraySlice window_dimensions, - tensorflow::gtl::ArraySlice strides, Padding padding); + tensorflow::gtl::ArraySlice window_strides, Padding padding); } // namespace xla diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 7b7f268728..6c9b62b48d 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -76,6 +76,20 @@ class ReduceWindowTest : public ClientLibraryTestBase { ComputationBuilder builder_; }; +TEST_F(ReduceWindowTest, MismatchedRanksGivesErrorStatus) { + const auto input = builder_.ConstantR1({1, 1, 1, 1}); + const auto init_value = builder_.ConstantR0(0); + TF_ASSERT_OK(builder_.first_error()); + builder_.ReduceWindow(input, init_value, + CreateScalarAddComputation(F32, &builder_), + /*window_dimensions=*/{1, 2}, + /*window_strides=*/{1}, Padding::kValid); + ASSERT_EQ(builder_.first_error().code(), tensorflow::error::INVALID_ARGUMENT) + << builder_.first_error(); + ASSERT_THAT(builder_.first_error().error_message(), + ::testing::HasSubstr("Want input dimensions size")); +} + TEST_F(ReduceWindowTest, Min3In5Stride2) { const auto input = builder_.ConstantR1({10000, 1000, 100, 10, 1}); ReduceWindowMin(input, {3}, {2}, Padding::kValid); -- GitLab From 578b9a29b252b4cbd57c2f6bdd9eaef4aae3e207 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 4 Oct 2017 18:24:54 -0700 Subject: [PATCH 031/909] Adds integration test for tf.estimator.train_and_evaluate. PiperOrigin-RevId: 171094690 --- tensorflow/python/estimator/BUILD | 6 +- tensorflow/python/estimator/training_test.py | 145 ++++++++++++++++++- 2 files changed, 149 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 9085ef419b..3507d9fedc 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -141,12 +141,15 @@ py_library( py_test( name = "training_test", - size = "small", + size = "medium", srcs = ["training_test.py"], + shard_count = 4, srcs_version = "PY2AND3", deps = [ + ":dnn", ":estimator", ":exporter", + ":inputs", ":run_config", ":training", "//tensorflow/python:client_testlib", @@ -155,6 +158,7 @@ py_test( "//tensorflow/python:platform", "//tensorflow/python:training", "//tensorflow/python:util", + "//tensorflow/python/feature_column", ], ) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index e4c400ca7f..51aed757a2 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -19,19 +19,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - +import glob import json +import os import random +import shutil +import tempfile import time +import numpy as np + from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import exporter as exporter_lib from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator import training +from tensorflow.python.estimator.canned import dnn +from tensorflow.python.estimator.canned import prediction_keys +from tensorflow.python.estimator.export import export as export_lib +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.summary import summary_iterator +from tensorflow.python.summary.writer import writer_cache from tensorflow.python.training import monitored_session from tensorflow.python.training import server_lib from tensorflow.python.training import session_run_hook @@ -1230,5 +1243,135 @@ class TrainingExecutorRunLocalTest(test.TestCase): executor.run_local() +class TrainAndEvaluateIntegrationTest(test.TestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def tearDown(self): + if self._model_dir: + shutil.rmtree(self._model_dir) + + def _as_label(self, data_in_float): + return np.rint(data_in_float).astype(np.int64) + + def _get_exporter(self, name, fc): + feature_spec = feature_column.make_parse_example_spec(fc) + serving_input_receiver_fn = ( + export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) + return exporter_lib.LatestExporter( + name, serving_input_fn=serving_input_receiver_fn) + + def _extract_loss_and_global_step(self, event_folder): + """Returns the loss and global step in last event.""" + event_paths = glob.glob(os.path.join(event_folder, 'events*')) + + loss = None + global_step_count = None + + for e in summary_iterator.summary_iterator(event_paths[-1]): + current_loss = None + for v in e.summary.value: + if v.tag == 'loss': + current_loss = v.simple_value + + # If loss is not found, global step is meaningless. + if current_loss is None: + continue + + current_global_step = e.step + if global_step_count is None or current_global_step > global_step_count: + global_step_count = current_global_step + loss = current_loss + + return (loss, global_step_count) + + def test_complete_flow_with_non_distributed_configuration(self): + n_classes = 3 + input_dimension = 2 + batch_size = 10 + + eval_name = 'foo' + exporter_name = 'saved_model_exporter' + + # max_steps should be larger than save_summary_steps + max_steps = 10 + save_summary_steps = 2 + + data = np.linspace( + 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) + x_data = data.reshape(batch_size, input_dimension) + y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) + + # learn y = x + train_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + y=y_data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + y=y_data, + batch_size=batch_size, + num_epochs=1, + shuffle=False) + + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data}, + batch_size=batch_size, + shuffle=False) + + feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,))] + + est = dnn.DNNClassifier( + hidden_units=(2, 2), + feature_columns=feature_columns, + n_classes=n_classes, + config=run_config_lib.RunConfig(save_summary_steps=save_summary_steps), + model_dir=self._model_dir) + + train_spec = training.TrainSpec(input_fn=train_input_fn, + max_steps=max_steps) + + eval_spec = training.EvalSpec( + name=eval_name, input_fn=eval_input_fn, steps=None, + exporters=self._get_exporter(exporter_name, feature_columns), + throttle_secs=2) + + training.train_and_evaluate(est, train_spec, eval_spec) + + # Make sure nothing is stuck in limbo. + writer_cache.FileWriterCache.clear() + + # Examine the training events. Use a range to check global step to avoid + # flakyness due to global step race condition. + training_loss, training_global_step = self._extract_loss_and_global_step( + est.model_dir) + self.assertIsNotNone(training_loss) + self.assertTrue( + max_steps - save_summary_steps < training_global_step <= max_steps) + + # Examine the eval events. The global step should be accurate. + eval_loss, eval_global_step = self._extract_loss_and_global_step( + event_folder=os.path.join(est.model_dir, 'eval_' + eval_name)) + self.assertIsNotNone(eval_loss) + self.assertEqual(max_steps, eval_global_step) + + # Examine the export folder. + export_dir = os.path.join(os.path.join(est.model_dir, 'export'), + exporter_name) + self.assertTrue(gfile.Exists(export_dir)) + + # Examine the ckpt for predict. + predicted_proba = np.array([ + x[prediction_keys.PredictionKeys.PROBABILITIES] + for x in est.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) + + if __name__ == '__main__': test.main() -- GitLab From 2ae5bfce5519fc40019378280a6f26d36d924cf0 Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 4 Oct 2017 18:31:16 -0700 Subject: [PATCH 032/909] Introduce CudnnRNN layers * Layerize CudnnRNN APIs * Support build(), call() APIs * Support building custom saveable() as a member method * Custom saveable built as part of build() * Support forward-compatible opaque param initialization w/ weight & bias initializer. * Add more documentation. Unittest revamp * Introduce CudnnTestModel class to build graph used by all unittests, avoid repeatedly building similar graphs. * Split tests by RNN types, for more explicit error localization. * Use custom gradient check routine which is cleaner. * Deleted golden-based inference tests since we use regular rnn as reference impl now. PiperOrigin-RevId: 171095161 --- tensorflow/contrib/cudnn_rnn/BUILD | 61 +- .../python/kernel_tests/cudnn_rnn_test.py | 1050 +++++++++++++++++ .../cudnn_rnn/python/layers/cudnn_rnn.py | 552 +++++++++ .../cudnn_rnn/python/ops/cudnn_rnn_ops.py | 111 +- 4 files changed, 1724 insertions(+), 50 deletions(-) create mode 100644 tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py create mode 100644 tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index d4214587cd..ae9413fdd6 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -54,7 +54,7 @@ tf_gen_op_wrapper_py( ) tf_custom_op_py_library( - name = "cudnn_rnn_py", + name = "cudnn_rnn_ops_py", srcs = [ "__init__.py", "python/ops/cudnn_rnn_ops.py", @@ -81,10 +81,67 @@ tf_custom_op_py_library( ], ) +tf_custom_op_py_library( + name = "cudnn_rnn_py", + srcs = [ + "__init__.py", + "python/layers/cudnn_rnn.py", + ], + dso = [ + ":python/ops/_cudnn_rnn_ops.so", + ], + kernels = [ + ":cudnn_rnn_kernels", + ":cudnn_rnn_ops_op_lib", + ], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + ":cudnn_rnn_ops", + ":cudnn_rnn_ops_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + ], +) + cuda_py_test( name = "cudnn_rnn_ops_test", size = "large", srcs = ["python/kernel_tests/cudnn_rnn_ops_test.py"], + additional_deps = [ + ":cudnn_rnn_ops_py", + "//tensorflow/core:protos_all_py", + "//tensorflow/contrib/rnn:rnn_py", + "//tensorflow/python/ops/losses:losses", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python:variables", + ], + shard_count = 6, + tags = [ + "manual", + "requires_cudnn5", + ], +) + +cuda_py_test( + name = "cudnn_rnn_test", + size = "large", + srcs = ["python/kernel_tests/cudnn_rnn_test.py"], additional_deps = [ ":cudnn_rnn_py", "//tensorflow/core:protos_all_py", @@ -114,7 +171,7 @@ cuda_py_test( size = "large", srcs = ["python/kernel_tests/cudnn_rnn_ops_benchmark.py"], additional_deps = [ - ":cudnn_rnn_py", + ":cudnn_rnn_ops_py", "//tensorflow/contrib/rnn:rnn_py", "//tensorflow/python:array_ops", "//tensorflow/python:client", diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py new file mode 100644 index 0000000000..9e627bcaf4 --- /dev/null +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py @@ -0,0 +1,1050 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Cudnn RNN models.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +import os +import unittest + +import numpy as np + +from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn +from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops +from tensorflow.contrib.rnn.python.ops import rnn as contrib_rnn_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.framework.test_util import TensorFlowTestCase +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import gradients_impl as gradients +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import rnn as rnn_lib +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.ops import variables +from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import googletest +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import gradient_descent +from tensorflow.python.training import saver as saver_lib + +CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM +CUDNN_GRU = cudnn_rnn_ops.CUDNN_GRU +CUDNN_RNN_RELU = cudnn_rnn_ops.CUDNN_RNN_RELU +CUDNN_RNN_TANH = cudnn_rnn_ops.CUDNN_RNN_TANH +CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION +CUDNN_RNN_BIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION + +CUDNN_LSTM_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_LSTM_PARAMS_PER_LAYER +CUDNN_GRU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_GRU_PARAMS_PER_LAYER +CUDNN_RNN_TANH_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_TANH_PARAMS_PER_LAYER +CUDNN_RNN_RELU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_RELU_PARAMS_PER_LAYER + + +class CudnnTestModel(object): + """Model with convenient APIs for easier building and running test graph. + + The graph built is used by all tests below to avoid repeatedly building + similar test graphs. + """ + + def __init__(self, + rnn_mode, + num_layers, + num_units, + input_size, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + dtype=dtypes.float32, + training=False, + kernel_initializer=None, + bias_initializer=None): + if dtype not in (dtypes.float32, dtypes.float64): + raise ValueError("Invalid dtype: %s" % dtype) + self._dtype = dtype + + self._inputs = array_ops.placeholder( + dtype=dtype, shape=[None, None, input_size], name="inputs") + h = array_ops.placeholder( + dtype=dtype, shape=[None, None, num_units], name="h") + c = array_ops.placeholder( + dtype=dtype, shape=[None, None, num_units], name="c") + if rnn_mode == CUDNN_LSTM: + model_fn = cudnn_rnn.CudnnLSTM + self._initial_state = (h, c) + elif rnn_mode == CUDNN_GRU: + model_fn = cudnn_rnn.CudnnGRU + self._initial_state = (h,) + elif rnn_mode == CUDNN_RNN_TANH: + model_fn = cudnn_rnn.CudnnRNNTanh + self._initial_state = (h,) + elif rnn_mode == CUDNN_RNN_RELU: + model_fn = cudnn_rnn.CudnnRNNRelu + self._initial_state = (h,) + else: + raise ValueError("Invalid rnn_mode: %s" % rnn_mode) + self._rnn = model_fn( + num_layers, + num_units, + direction=direction, + dropout=dropout, + dtype=dtype, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer) + self._rnn.build([None, None, input_size]) + + self._outputs, self._output_state = self._rnn( + self._inputs, initial_state=self._initial_state, training=training) + + def _AddUp(self, outputs, output_state): + total = math_ops.reduce_sum(outputs) + for s in output_state: + total += math_ops.reduce_sum(s) + return total + + @property + def inputs(self): + return self._inputs + + @property + def initial_state(self): + return self._initial_state + + @property + def outputs(self): + return self._outputs + + @property + def output_state(self): + return self._output_state + + @property + def rnn(self): + return self._rnn + + @property + def total_sum(self): + return self._AddUp(self.outputs, self.output_state) + + def SynthesizeInput(self, seq_length, batch_size, seed=1234): + """Synthesizes input and initial state values for testing.""" + np.random.seed(seed) + num_layers = self._rnn.num_layers + dir_count = self._rnn.num_dirs + num_units = self._rnn.num_units + input_size = self._rnn.input_size + + np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 + inputs = np.random.randn(seq_length, batch_size, + input_size).astype(np_dtype) + input_h = np.random.randn(num_layers * dir_count, batch_size, + num_units).astype(np_dtype) + if self._rnn.rnn_mode == CUDNN_LSTM: + input_c = np.random.randn(num_layers * dir_count, batch_size, + num_units).astype(np_dtype) + initial_state = (input_h, input_c) + else: + initial_state = (input_h,) + return inputs, initial_state + + def ZeroState(self, batch_size): + num_layers = self._rnn.num_layers + dir_count = self._rnn.num_dirs + num_units = self._rnn.num_units + + np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 + input_h = np.zeros((num_layers * dir_count, batch_size, + num_units)).astype(np_dtype) + if self._rnn.rnn_mode == CUDNN_LSTM: + input_c = np.zeros((num_layers * dir_count, batch_size, + num_units)).astype(np_dtype) + initial_state = (input_h, input_c) + else: + initial_state = (input_h,) + return initial_state + + def FProp(self, inputs_t, initial_state_t, training): + """Builds additional subgraph with given inputs and state. + + Args: + inputs_t: a tensor. + initial_state_t: a tensor. + training: boolean, true if training mode. + Returns: + A tensor of the forward pass output of the model. + """ + outputs, output_state = self._rnn( + inputs_t, initial_state=initial_state_t, training=training) + return self._AddUp(outputs, output_state) + + def Feed(self, sess, inputs, initial_state=None, return_sum=True): + """Runs graph with given inputs and initial state.""" + batch_size = inputs.shape[1] + if initial_state is None: + initial_state = self.ZeroState(batch_size) + if return_sum: + return sess.run( + self.total_sum, + feed_dict={self.inputs: inputs, + self.initial_state: initial_state}) + else: + return sess.run( + [self.outputs, self.output_state], + feed_dict={self.inputs: inputs, + self.initial_state: initial_state}) + + +def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None): + mode = rnn.rnn_mode + num_units = rnn.num_units + num_layers = rnn.num_layers + + # To reuse cuDNN-trained models, must use cudnn compatible rnn cells. + if mode == CUDNN_LSTM: + single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units) + elif mode == CUDNN_GRU: + single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units) + elif mode == CUDNN_RNN_TANH: + single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh)) + elif mode == CUDNN_RNN_RELU: + single_cell = ( + lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu)) + else: + raise ValueError("%s is not supported!" % mode) + + if not is_bidi: + cell = rnn_cell_impl.MultiRNNCell( + [single_cell() for _ in range(num_layers)]) + return rnn_lib.dynamic_rnn( + cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope) + else: + cells_fw = [single_cell() for _ in range(num_layers)] + cells_bw = [single_cell() for _ in range(num_layers)] + + (outputs, output_state_fw, + output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn( + cells_fw, + cells_bw, + inputs, + dtype=dtypes.float32, + time_major=True, + scope=scope) + return outputs, (output_state_fw, output_state_bw) + + +class CudnnRNNTestBasic(TensorFlowTestCase): + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testLayerBasic(self): + num_layers = 4 + num_units = 2 + batch_size = 8 + direction = CUDNN_RNN_UNIDIRECTION + dir_count = 1 + + with vs.variable_scope("main"): + kernel_initializer = init_ops.constant_initializer(0.) + bias_initializer = init_ops.constant_initializer(0.) + inputs = random_ops.random_uniform([ + num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) + + lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, + direction=direction, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + name="awesome_lstm") + + # Build the layer + outputs1, _ = lstm(inputs) + # Reuse the layer + outputs2, _ = lstm(inputs) + + total_sum1 = math_ops.reduce_sum(outputs1) + total_sum2 = math_ops.reduce_sum(outputs2) + + with vs.variable_scope("main", reuse=True): + lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, + direction=direction, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + name="awesome_lstm") + + # Reuse the layer + outputs3, _ = lstm(inputs) + total_sum3 = math_ops.reduce_sum(outputs3) + + self.assertEqual(1, len(variables.trainable_variables())) + self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS))) + self.assertEqual("main/awesome_lstm/opaque_kernel", + variables.trainable_variables()[0].op.name) + + with self.test_session(use_gpu=True) as sess: + sess.run(variables.global_variables_initializer()) + (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run( + [total_sum1, total_sum2, total_sum3]) + self.assertEqual(0, total_sum1_v) + self.assertEqual(0, total_sum2_v) + self.assertEqual(0, total_sum3_v) + + +# TODO(jamesqin): Transform to parameterized test after it is included in the +# TF open source codebase. +class CudnnRNNTestSaveRestore(TensorFlowTestCase): + + def _CompareWeights(self, lhs, rhs): + self.assertEqual(len(lhs), len(rhs)) + for lw, rw in zip(lhs, rhs): + self.assertAllEqual(lw, rw) + + def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction): + self.assertEqual(len(lhs), len(rhs)) + if rnn_mode == CUDNN_LSTM: + num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER + elif rnn_mode == CUDNN_GRU: + num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER + elif rnn_mode == CUDNN_RNN_TANH: + num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER + else: + num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER + num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2 + num_params_per_layer *= num_dirs + self.assertEqual(num_params_per_layer * num_layers, len(lhs)) + + for i in range(num_layers): + layer_lhs = lhs[i * num_params_per_layer: (i+1) * num_params_per_layer] + layer_rhs = rhs[i * num_params_per_layer: (i+1) * num_params_per_layer] + if direction == CUDNN_RNN_UNIDIRECTION: + self._CompareSingleLayerBiases(layer_lhs, layer_rhs) + else: + size = len(layer_lhs) + fw_lhs, bw_lhs = layer_lhs[:size//2], layer_lhs[size//2:] + fw_rhs, bw_rhs = layer_rhs[:size//2], layer_rhs[size//2:] + self._CompareSingleLayerBiases(fw_lhs, fw_rhs) + self._CompareSingleLayerBiases(bw_lhs, bw_rhs) + + def _CompareSingleLayerBiases(self, lhs, rhs): + self.assertEqual(len(lhs), len(rhs)) + + lf_lhs, rt_lhs = lhs[:len(lhs)//2], lhs[len(lhs)//2:] + lf_rhs, rt_rhs = rhs[:len(rhs)//2], rhs[len(rhs)//2:] + self.assertEqual(len(lf_lhs), len(rt_lhs)) + self.assertEqual(len(lf_rhs), len(rt_rhs)) + + sum_lhs, sum_rhs = [], [] + for lf, rt in zip(lf_lhs, rt_lhs): + sum_lhs.append(lf + rt) + for lf, rt in zip(lf_rhs, rt_rhs): + sum_rhs.append(lf + rt) + self.assertEqual(len(sum_lhs), len(sum_rhs)) + for lf, rt in zip(sum_lhs, sum_rhs): + self.assertAllEqual(lf, rt) + + def _TestSaveRestoreVariable(self, rnn_mode, direction, dtype): + input_size = 3 + num_layers = 2 + num_units = 7 + with ops.Graph().as_default() as g: + random_seed.set_random_seed(1234) + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype) + rnn = model.rnn + save_path = os.path.join(self.get_temp_dir(), + "save-restore-variable-test") + saver = saver_lib.Saver() + weights, biases = model.rnn.saveable._OpaqueParamsToCanonical() + opaque_params = rnn.trainable_variables[0] + # CudnnTestModel() creates CudnnOpaqueParamsSaveable that helps saver save + # Cudnn vars in canonical format. + reset_op = state_ops.assign( + opaque_params, + array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype)) + # Passing graph explictly, otherwise an old sess would be reused. + with self.test_session(use_gpu=True, graph=g) as sess: + sess.run(variables.global_variables_initializer()) + val = saver.save(sess, save_path) + self.assertEqual(save_path, val) + weights_v, biases_v = sess.run([weights, biases]) + + # Reset opaque param + sess.run(reset_op) + saver.restore(sess, save_path) + weights_v_restored, biases_v_restored = sess.run([weights, biases]) + + self._CompareWeights(weights_v, weights_v_restored) + self._CompareBiases(biases_v, biases_v_restored, rnn_mode, num_layers, + direction) + + def _TestSaveRestoreTwoVariables(self, rnn_mode, direction, dtype): + input_size = 3 + num_layers = 2 + num_units = 7 + with ops.Graph().as_default() as g: + random_seed.set_random_seed(1234) + with vs.variable_scope("m1"): + model1 = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype) + with vs.variable_scope("m2"): + model2 = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype) + opaque_params = (model1.rnn.trainable_variables[0], + model2.rnn.trainable_variables[0]) + weights1, biases1 = model1.rnn.saveable._OpaqueParamsToCanonical() + weights2, biases2 = model2.rnn.saveable._OpaqueParamsToCanonical() + reset_params = [ + state_ops.assign(params, + array_ops.zeros_like(params, dtype=dtype)) + for params in opaque_params + ] + reset_op = control_flow_ops.group(*reset_params) + save_path = os.path.join(self.get_temp_dir(), + "save-restore-variable-test2") + saver = saver_lib.Saver() + # Passing graph explictly, otherwise an old sess would be reused. + with self.test_session(use_gpu=True, graph=g) as sess: + sess.run(variables.global_variables_initializer()) + val = saver.save(sess, save_path) + self.assertEqual(save_path, val) + + weights1_v, biases1_v = sess.run([weights1, biases1]) + weights2_v, biases2_v = sess.run([weights2, biases2]) + + sess.run(reset_op) + saver.restore(sess, save_path) + weights1_v_restored, biases1_v_restored = sess.run([weights1, biases1]) + weights2_v_restored, biases2_v_restored = sess.run([weights2, biases2]) + + self._CompareWeights(weights1_v, weights1_v_restored) + self._CompareWeights(weights2_v, weights2_v_restored) + self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode, num_layers, + direction) + self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode, num_layers, + direction) + + def _TestSaveRestoreOutput(self, rnn_mode, direction, dtype): + with ops.Graph().as_default() as g: + num_layers = 2 + num_units = 7 + input_size = 7 + seq_length = 8 + batch_size = 4 + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype, + training=False) + rnn = model.rnn + + save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test") + saver = saver_lib.Saver() + + # Only one opaque var in a cudnn layer. + assert len(rnn.trainable_variables) == 1 + reset_params = state_ops.assign( + rnn.trainable_variables[0], + array_ops.zeros( + array_ops.shape(rnn.trainable_variables[0]), dtype=dtype)) + + # Passing graph explictly, otherwise an old sess would be reused. + with self.test_session(use_gpu=True, graph=g) as sess: + sess.run(variables.global_variables_initializer()) + inputs, initial_state = model.SynthesizeInput(seq_length, batch_size) + total_sum_v = model.Feed(sess, inputs, initial_state) + val = saver.save(sess, save_path) + self.assertEqual(save_path, val) + + sess.run(reset_params) + saver.restore(sess, save_path) + total_sum_v_restored = model.Feed(sess, inputs, initial_state) + self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5) + + def _TestSaveRestoreHelper(self, rnn_mode): + directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] + dtype_list = [dtypes.float32, dtypes.float64] + for direction, dtype in itertools.product(directions, dtype_list): + self._TestSaveRestoreVariable(rnn_mode, direction, dtype) + self._TestSaveRestoreTwoVariables(rnn_mode, direction, dtype) + self._TestSaveRestoreOutput(rnn_mode, direction, dtype) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSaveRestoreRepeatedlyCreateCustomSaveable(self): + input_size = 3 + num_layers = 2 + num_units = 7 + with ops.Graph().as_default(): + random_seed.set_random_seed(1234) + model = CudnnTestModel( + CUDNN_LSTM, + num_layers, + num_units, + input_size, + direction=CUDNN_RNN_UNIDIRECTION, + dtype=dtypes.float32) + with self.assertRaisesRegexp(RuntimeError, + "Cudnn saveable already created"): + model.rnn._create_saveable() + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSaveRestoreLSTM(self): + self._TestSaveRestoreHelper(CUDNN_LSTM) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSaveRestoreGRU(self): + self._TestSaveRestoreHelper(CUDNN_GRU) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSaveRestoreRNNTanh(self): + self._TestSaveRestoreHelper(CUDNN_RNN_TANH) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSaveRestoreRNNRelu(self): + self._TestSaveRestoreHelper(CUDNN_RNN_RELU) + + +# TODO(jamesqin): Transform to parameterized test after it is included in the +# TF open source codebase. +class CudnnRNNTestCompatibleRNNCells(TensorFlowTestCase): + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testCudnnCompatibleLSTM(self): + self._TestCudnnCompatibleRnnCellsHelper(CUDNN_LSTM) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testCudnnCompatibleGRU(self): + self._TestCudnnCompatibleRnnCellsHelper(CUDNN_GRU) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testCudnnCompatibleRNNTanh(self): + self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_TANH) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testCudnnCompatibleRNNRelu(self): + self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_RELU) + + def _TestCudnnCompatibleRnnCellsHelper(self, rnn_mode): + configs = [ + { + "num_layers": 1, + "seq_length": 3, + "num_units": 4, + "input_size": 5, + "batch_size": 6, + }, + { + "num_layers": 2, + "seq_length": 8, + "num_units": 4, + "input_size": 8, + "batch_size": 16, + }, + { + "num_layers": 2, + "seq_length": 3, + "num_units": 4, + "input_size": 5, + "batch_size": 6, + }, + { + "num_layers": 1, + "seq_length": 2, + "num_units": 2, + "input_size": 4, + "batch_size": 1, + }, + ] + directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] + for cfg, direction in zip(configs, directions): + self._TestCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"], + cfg["num_units"], cfg["input_size"], + cfg["batch_size"], rnn_mode, direction) + + def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, + input_size, batch_size, rnn_mode, direction): + dtype = dtypes.float32 + # Train graph + with ops.Graph().as_default() as g: + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype, + training=True) + target_output = array_ops.placeholder(dtype=dtype) + loss_op = losses.log_loss( + labels=target_output, predictions=model.total_sum) + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) + train_op = optimizer.minimize(loss_op) + + saver = saver_lib.Saver() + + # Train Cudnn model + seed = 0 + with self.test_session(use_gpu=True, graph=g) as sess: + sess.run(variables.global_variables_initializer()) + # Train 128 steps + num_steps = 128 + for _ in range(num_steps): + inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed) + targets = np.random.rand() + sess.run( + train_op, + feed_dict={ + model.inputs: inputs, + model.initial_state: model.ZeroState(batch_size), + target_output: targets + }) + seed += 1 + + save_path = os.path.join(self.get_temp_dir(), + ("cudnn-rnn-%s-test" % rnn_mode)) + save_v = saver.save(sess, save_path) + self.assertEqual(save_path, save_v) + + # Cudnn inference graph + with ops.Graph().as_default() as g: + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dtype=dtype, + training=False) + rnn = model.rnn + saver = saver_lib.Saver() + + inference_input = np.random.rand(seq_length, batch_size, + input_size).astype(np.float32) + with self.test_session(use_gpu=True, graph=g) as sess: + sess.run(variables.global_variables_initializer()) + saver.restore(sess, save_path) + + # Cudnn inference + cudnn_outputs_v, cudnn_output_states_v = model.Feed( + sess, inference_input, return_sum=False) + + # Canonical RNN inference graph + with ops.Graph().as_default() as g: + cell_inputs = array_ops.placeholder( + dtype, shape=[seq_length, batch_size, input_size]) + if direction == CUDNN_RNN_UNIDIRECTION: + # outputs is one tensor, states are num_layer tuples, each 2 tensors + (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs) + if rnn_mode == CUDNN_LSTM: + output_h = array_ops.stack([s.h for s in states]) + output_c = array_ops.stack([s.c for s in states]) + else: + output_state = array_ops.stack([s for s in states]) + else: + # outputs is one tensor. + # states is a tuple of 2 tuples: + # each sub tuple is num_layer tuples, each with 2 tensors. + (outputs, states) = _CreateCudnnCompatibleCanonicalRNN( + rnn, cell_inputs, is_bidi=True) + output_state_fw, output_state_bw = states + if rnn_mode == CUDNN_LSTM: + output_h, output_c = [], [] + for s_fw, s_bw in zip(output_state_fw, output_state_bw): + output_h.append(array_ops.stack([s_fw.h, s_bw.h])) + output_c.append(array_ops.stack([s_fw.c, s_bw.c])) + output_h = array_ops.concat(output_h, axis=0) + output_c = array_ops.concat(output_c, axis=0) + else: + output_state = [] + for s_fw, s_bw in zip(output_state_fw, output_state_bw): + output_state.append(array_ops.stack([s_fw, s_bw])) + output_state = array_ops.concat(output_state, axis=0) + saver = saver_lib.Saver() + + with self.test_session(use_gpu=True, graph=g) as sess: + saver.restore(sess, save_path) + + # BlockCell inference + if rnn_mode == CUDNN_LSTM: + outputs_v, output_h_v, output_c_v = sess.run( + [outputs, output_h, output_c], + feed_dict={cell_inputs: inference_input}) + self.assertAllClose(cudnn_outputs_v, outputs_v) + cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v + self.assertAllClose(cudnn_output_h_v, output_h_v) + self.assertAllClose(cudnn_output_c_v, output_c_v) + else: + outputs_v, output_state_v = sess.run( + [outputs, output_state], + feed_dict={cell_inputs: inference_input}) + self.assertAllClose(cudnn_outputs_v, outputs_v, atol=1e-5, rtol=1e-5) + (cudnn_output_h_v,) = cudnn_output_states_v + self.assertAllClose(cudnn_output_h_v, output_state_v, atol=1e-5, + rtol=1e-5) + + +class CudnnRNNTestParamsSize(TensorFlowTestCase): + + def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size, + direction): + logging.info("Testing one lstm param size with config: %s", locals()) + dtype = dtypes.float32 + + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + dtype=dtype, + direction=direction) + rnn = model.rnn + + # Min param size estimate = sum(weights.size) + sum(biases.size) + min_params_size = ( + np.sum(map(np.prod, rnn.canonical_weight_shapes)) + + np.sum([sp[0] for sp in rnn.canonical_bias_shapes])) + + opaque_params = rnn.trainable_variables[0] + with self.test_session(use_gpu=True, graph=ops.get_default_graph()): + variables.global_variables_initializer().run() + opaque_params_size_v = opaque_params.eval().size + self.assertLessEqual(min_params_size, opaque_params_size_v) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testOpaqueParamsSize(self): + test_configs = [ + [4, 200, 200], + [4, 200, 300], + [4, 200, 100], + [1, 100, 200], + [2, 200, 100], + [3, 200, 400], + ] + directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] + rnns = [CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH] + for (rnn, config, direction) in itertools.product(rnns, test_configs, + directions): + num_layers, num_units, input_size = config + with ops.Graph().as_default(): + self._TestOpaqueParamsSize(rnn, num_layers, num_units, input_size, + direction) + + +class CudnnRNNTestTraining(TensorFlowTestCase): + + def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1): + """Compute the numeric gradient of y wrt to x. + + Args: + sess: The TF session constructed with a graph containing x and y. + y: A scalar TF Tensor in the graph constructed in sess. + x: A TF Tensor in the graph constructed in sess. + delta: Gradient checker's small perturbation of x[i]. + step: Only compute numerical gradients for a subset of x values. + I.e. dy/dx[i] is computed if i % step == 0. + Returns: + A Tensor of the same shape and dtype as x. If x[i] is not chosen + to compute the numerical gradient dy/x[i], the corresponding + value is set to 0. + """ + + x_data = sess.run(x) + x_size = x_data.size + x_shape = x_data.shape + + numeric_grad = np.zeros(x_size, dtype=x_data.dtype) + + for i in range(0, x_size, step): + x_pos = x_data.copy() + if x_size == 1: + x_pos += delta + else: + x_pos.flat[i] += delta + y_pos_feed_dict = dict([(x.name, x_pos)]) + y_pos = sess.run(y, feed_dict=y_pos_feed_dict) + + x_neg = x_data.copy() + if x_size == 1: + x_neg -= delta + else: + x_neg.flat[i] -= delta + y_neg_feed_dict = dict([(x.name, x_neg)]) + y_neg = sess.run(y, feed_dict=y_neg_feed_dict) + numeric_grad[i] = (y_pos - y_neg) / (2 * delta) + return numeric_grad.reshape(x_shape) + + def _GradientCheck(self, sess, y, xs, tolerance=1e-6, delta=1e-4): + sym_grads_t = gradients.gradients(y, xs) + sym_grads = sess.run(sym_grads_t) + + num_grads = [self._ComputeNumericGrad(sess, y, x, delta) for x in xs] + self.assertEqual(len(sym_grads), len(num_grads)) + for sym, num in zip(sym_grads, num_grads): + self.assertFalse(np.any(np.isnan(sym))) + self.assertFalse(np.any(np.isnan(num))) + self.assertAllClose(sym, num, atol=tolerance, rtol=tolerance) + + def _TestOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size, + batch_size, seq_length, dir_count, dropout, dtype, + delta, tolerance): + # Gradient checking runs two forward ops with almost the same input. Need to + # make sure the drop patterns across the two runs are the same. + logging.info("Training test with config: %s", locals()) + old_env_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE", str(False)) + os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True) + random_seed.set_random_seed(5678) + has_input_c = (rnn_mode == CUDNN_LSTM) + direction = (CUDNN_RNN_UNIDIRECTION + if dir_count == 1 else CUDNN_RNN_BIDIRECTION) + model = CudnnTestModel( + rnn_mode, + num_layers, + num_units, + input_size, + direction=direction, + dropout=dropout, + dtype=dtype, + training=True, + bias_initializer=init_ops.random_normal_initializer( + mean=1., dtype=dtype)) + rnn = model.rnn + params = rnn.trainable_variables[0] + + inputs = variables.Variable( + random_ops.random_uniform( + [seq_length, batch_size, input_size], dtype=dtype), + dtype=dtype) + input_h = variables.Variable( + random_ops.random_uniform( + [num_layers * dir_count, batch_size, num_units], dtype=dtype), + dtype=dtype) + if has_input_c: + input_c = variables.Variable( + random_ops.random_uniform( + [num_layers * dir_count, batch_size, num_units], dtype=dtype), + dtype=dtype) + initial_state = (input_h, input_c) + else: + initial_state = (input_h,) + total_sum = model.FProp(inputs, initial_state, training=True) + + with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: + sess.run(variables.global_variables_initializer()) + all_inputs = [inputs, params] + for s in initial_state: + all_inputs.append(s) + self._GradientCheck( + sess, total_sum, all_inputs, tolerance=tolerance, delta=delta) + os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = old_env_state + + def _TestSimpleTrainingHelper(self, rnn_mode, test_configs): + dropouts = [0., 0.5, 1.] + for config, dropout in itertools.product(test_configs, dropouts): + dtype = config.get("dtype", dtypes.float32) + delta = config.get("delta", 1e-4) + tolerance = config.get("tolerance", 1e-6) + dir_count = config.get("dir_count", 1) + shape = config["shape"] + with ops.Graph().as_default(): + self._TestOneSimpleTraining(rnn_mode, shape["num_layers"], + shape["num_units"], shape["input_size"], + shape["batch_size"], shape["seq_length"], + dir_count, dropout, dtype, delta, tolerance) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingLSTM64(self): + test_configs = [ + { + "dtype": dtypes.float64, + "tolerance": 5e-6, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingLSTM32(self): + test_configs = [ + { + "dtype": dtypes.float32, + "delta": 1e-4, + "tolerance": 9e-2, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingGRU64(self): + test_configs = [ + { + "dtype": dtypes.float64, + "tolerance": 5e-6, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + } + }, + ] + self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingGRU32(self): + test_configs = [ + { + "dtype": dtypes.float32, + "delta": 1e-3, + "tolerance": 4e-3, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingRNNTanh64(self): + test_configs = [ + { + "dtype": dtypes.float64, + "tolerance": 5e-6, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingRNNTanh32(self): + test_configs = [ + { + "dtype": dtypes.float32, + "delta": 1e-3, + "tolerance": 5e-3, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingRNNRelu64(self): + test_configs = [ + { + "dtype": dtypes.float64, + "tolerance": 5e-6, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) + + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testSimpleTrainingRNNRelu32(self): + test_configs = [ + { + "dtype": dtypes.float32, + "delta": 1e-3, + "tolerance": 7e-2, + "shape": { + "num_layers": 2, + "num_units": 3, + "input_size": 4, + "batch_size": 3, + "seq_length": 4, + }, + }, + ] + self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py new file mode 100644 index 0000000000..810fb6450c --- /dev/null +++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py @@ -0,0 +1,552 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Cudnn RNN operators.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops +from tensorflow.contrib.util import loader +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.layers import base as base_layer +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import tf_logging as logging + +_cudnn_rnn_ops_so = loader.load_op_library( + resource_loader.get_path_to_datafile("_cudnn_rnn_ops.so")) + +CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION +CUDNN_RNN_BIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION +CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM +CUDNN_GRU = cudnn_rnn_ops.CUDNN_GRU +CUDNN_RNN_RELU = cudnn_rnn_ops.CUDNN_RNN_RELU +CUDNN_RNN_TANH = cudnn_rnn_ops.CUDNN_RNN_TANH + +# Half for cell input, half for hidden states. +CUDNN_LSTM_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_LSTM_PARAMS_PER_LAYER +CUDNN_GRU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_GRU_PARAMS_PER_LAYER +CUDNN_RNN_TANH_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_TANH_PARAMS_PER_LAYER +CUDNN_RNN_RELU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_RELU_PARAMS_PER_LAYER + +CUDNN_INPUT_LINEAR_MODE = cudnn_rnn_ops.CUDNN_INPUT_LINEAR_MODE +CUDNN_INPUT_SKIP_MODE = cudnn_rnn_ops.CUDNN_INPUT_SKIP_MODE +CUDNN_INPUT_AUTO_MODE = cudnn_rnn_ops.CUDNN_INPUT_AUTO_MODE + + +class _CudnnRNN(base_layer.Layer): + # pylint:disable=line-too-long + """Abstract class for RNN layers with Cudnn implementation. + + Cudnn RNNs have two major differences from other platform-independent RNNs tf + provides: + * Cudnn LSTM and GRU are mathematically different from their tf counterparts. + (e.g. @{tf.contrib.rnn.LSTMBlockCell} and @{tf.nn.rnn_cell.GRUCell}. + * Cudnn-trained checkpoints are not directly compatible with tf RNNs: + * They use a single opaque parameter buffer for the entire (possibly) + multi-layer multi-directional RNN; Whereas tf RNN weights are per-cell and + layer. + * The size and layout of the parameter buffers may change between + CUDA/CuDNN/GPU generations. Because of that, the opaque parameter variable + does not have a static shape and is not partitionable. Instead of using + partitioning to alleviate the PS's traffic load, try building a + multi-tower model and do gradient aggregation locally within the host + before updating the PS. See https://www.tensorflow.org/performance/performance_models#parameter_server_variables + for a detailed performance guide. + + Consequently, if one plans to use Cudnn trained models on both GPU and CPU + for inference and training, one needs to: + * Create a CudnnOpaqueParamsSaveable subclass object to save RNN params in + canonical format. (This is done for you automatically during layer building + process.) + * When not using a Cudnn RNN class, use CudnnCompatibleRNN classes to load the + checkpoints. These classes are platform-independent and perform the same + computation as Cudnn for training and inference. + Similarly, CudnnCompatibleRNN-trained checkpoints can be loaded by CudnnRNN + classes seamlessly. + + Below is a typical workflow(using LSTM as an example): + for detailed performance guide. + + # Use Cudnn-trained checkpoints with CudnnCompatibleRNNs + ```python + with tf.Graph().as_default(): + lstm = CudnnLSTM(num_layers, num_units, direction, ...) + + outputs, output_states = lstm(inputs, initial_states, training=True) + + # If user plans to delay calling the cell with inputs, one can do + # lstm.build(input_shape) + + saver = Saver() + + # training subgraph + ... + + # Once in a while save the model. + saver.save(save_path) + + # Inference subgraph for unidirectional RNN on, e.g., CPU or mobile. + with tf.Graph().as_default(): + single_cell = lambda: tf.contrib.cudnn_rnn.CudnnCompatibleLSTM(num_units) + + # NOTE: Even if there's only one layer, the cell needs to be wrapped in + # MultiRNNCell. + cell = tf.nn.rnn_cell.MultiRNNCell( + [single_cell() for _ in range(num_layers)]) + + # Leave the scope arg unset. + outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state, ...) + + saver = Saver() + + # Create session + sess = ... + + # Restores + saver.restore(sess, save_path) + + # Inference subgraph for bidirectional RNN + with tf.Graph().as_default(): + single_cell = lambda: tf.contrib.cudnn_rnn.CudnnCompatibleLSTM(num_units) + cells_fw = [single_cell() for _ in range(num_layers)] + cells_bw = [single_cell() for _ in range(num_layers)] + + # Leave the scope arg unset. + (outputs, output_state_fw, + output_state_bw) = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( + cells_fw, cells_bw, inputs, ...) + saver = Saver() + + # Create session + sess = ... + + # Restores + saver.restore(sess, save_path) + ``` + """ + # pylint:enable=line-too-long + + # The following are constants defined by subclasses. + # Type of RNN cell. + _rnn_mode = None + # Number of cell weights(or biases) per layer. + _num_params_per_layer = None + # Custom SaveableObject class for the CudnnRNN class. + _saveable_cls = None + + # TODO(jamesqin): support float16 CuDNN RNN + def __init__(self, + num_layers, + num_units, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=None, + dtype=dtypes.float32, + kernel_initializer=None, + bias_initializer=None, + name=None): + """Creates a CudnnRNN model from model spec. + + Args: + num_layers: the number of layers for the RNN model. + num_units: the number of units within the RNN model. + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It can be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Can be either + 'unidirectional' or 'bidirectional' + dropout: dropout rate, a number between [0, 1]. Dropout is applied on + inputs of each layer. When set to 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + dtype: tf.float32 or tf.float64 + kernel_initializer: starting value to initialize the weight. + bias_initializer: starting value to initialize the bias + (default is all zeros). + name: VariableScope for the created subgraph; defaults to class name. + This only serves the default scope if later no scope is specified when + invoking __call__(). + + Raises: + ValueError: if direction is invalid. + """ + super(_CudnnRNN, self).__init__(dtype=dtype, name=name) + cudnn_rnn_ops.check_direction(direction) + cudnn_rnn_ops.check_input_mode(input_mode) + + self._num_layers = num_layers + self._num_units = num_units + self._input_mode = input_mode + self._direction = direction + self._dropout = dropout + self._seed = seed + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + # Init input_size to None, which will be set after build(). + self._input_size = None + self._saveable = None + + @property + def num_layers(self): + return self._num_layers + + @property + def num_units(self): + return self._num_units + + @property + def input_mode(self): + """Input mode of first layer. + + Indicates whether there is a linear projection between the input and the + actual computation before the first layer. It can be + * 'linear_input': (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior) + * 'skip_input': 'skip_input' is only allowed when input_size == num_units. + * 'auto_select'. implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + + Returns: + 'linear_input', 'skip_input' or 'auto_select'. + """ + return self._input_mode + + @property + def input_size(self): + if not self._input_size: + raise ValueError( + "\'input_size\' is unknown since layer has not been built.") + return self._input_size + + @property + def rnn_mode(self): + """Type of RNN cell used. + + Returns: + `lstm`, `gru`, `rnn_relu` or `rnn_tanh`. + """ + return self._rnn_mode + + @property + def direction(self): + """Returns `unidirectional` or `bidirectional`.""" + return self._direction + + @property + def num_dirs(self): + return 1 if self._direction == CUDNN_RNN_UNIDIRECTION else 2 + + @property + def saveable(self): + return self._saveable + + @property + def canonical_weight_shapes(self): + """Shapes of Cudnn canonical weight tensors.""" + if not self._input_size: + raise RuntimeError( + "%s.canonical_weight_shapes invoked before input shape is known" % + type(self).__name__) + + shapes = [] + for i in range(self._num_layers): + shapes.extend(self._canonical_weight_shape(i)) + return shapes + + @property + def canonical_bias_shapes(self): + """Shapes of Cudnn canonical bias tensors.""" + return self._canonical_bias_shape(0) * self._num_layers + + def _update_trainable_weights(self, getter, *args, **kwargs): + """Custom getter for layer variables.""" + # Add variables to layer's `(non_)trainable_weights` list(s). + variable = getter(*args, **kwargs) + trainable = kwargs.get("trainable", True) + if trainable and variable not in self._trainable_weights: + self._trainable_weights.append(variable) + elif not trainable and variable not in self._non_trainable_weights: + self._non_trainable_weights.append(variable) + return variable + + def build(self, input_shape): + """Create variables of the Cudnn RNN. + + It can be called manually before `__call__()` or automatically through + `__call__()`. In the former case, subsequent `__call__()`s will skip + creating variables. + Args: + input_shape: network input tensor shape, a python list or a TensorShape + object with 3 dimensions. + Raises: + ValueError: if input_shape has wrong dimension or unknown 3rd dimension. + """ + if self.built: + return + + input_shape = tensor_shape.TensorShape(input_shape) + if input_shape.ndims != 3: + raise ValueError("Expecting input_shape with 3 dims, got %d" % + input_shape.ndims) + if input_shape[-1].value is None: + raise ValueError("The last dimension of the inputs to `CudnnRNN` " + "should be defined. Found `None`.") + self._input_size = input_shape[-1].value + self.input_spec = base_layer.InputSpec(ndim=3, axes={-1: self._input_size}) + + self._set_scope(None) + + # Not using base class `add_variable()` since the it calls + # `tf.get_variable()` with a callable initializer whereas here with a + # tensor. The difference is mandated to support forward-compatibility with + # Cudnn. + with vs.variable_scope( + self._scope, + reuse=self.built, + custom_getter=self._update_trainable_weights): + if self._kernel_initializer is None: + self._kernel_initializer = init_ops.glorot_uniform_initializer( + seed=self._seed, dtype=self.dtype) + if self._bias_initializer is None: + self._bias_initializer = init_ops.constant_initializer( + 0.0, dtype=self.dtype) + + weights = [ + self._kernel_initializer(sp, dtype=self.dtype) + for sp in self.canonical_weight_shapes + ] + biases = [ + self._bias_initializer(sp, dtype=self.dtype) + for sp in self.canonical_bias_shapes + ] + opaque_params_t = self._canonical_to_opaque(weights, biases) + + if vs.get_variable_scope().partitioner is not None: + logging.warn( + "Partitioner is not supported for Cudnn RNN layer variables, using " + "it will create forward-compatibility issues with future " + "CUDA/CuDNN generations.") + # Initialize opaque params with a tensor. + self.kernel = vs.get_variable( + "opaque_kernel", initializer=opaque_params_t, validate_shape=False) + # Create saveable in the outer scope of the cudnn subgraph, such that + # alternative subgraph with platform-independent rnn cells can load the + # checkpoints directly. + if not (self.built or vs.get_variable_scope().reuse): + self._create_saveable() + self.built = True + + def call(self, inputs, initial_state=None, training=True): + """Runs the forward step for the RNN model. + + Args: + inputs: `3-D` tensor with shape `[time_len, batch_size, input_size]`. + initial_state: a tuple of tensor(s) of shape + `[num_layers * num_dirs, batch_size, num_units]`. If not provided, use + zero initial states. The tuple size is 2 for LSTM and 1 for other RNNs. + training: whether this operation will be used in training or inference. + Returns: + output: a tensor of shape `[time_len, batch_size, num_dirs * num_units]`. + It is a `concat([fwd_output, bak_output], axis=2)`. + output_states: a tuple of tensor(s) of the same shape and structure as + `initial_state`. + Raises: + ValueError: initial_state is not a tuple. + """ + if initial_state is not None and not isinstance(initial_state, tuple): + raise ValueError("Invalid initial_state type: %s, expecting tuple.", + type(initial_state)) + dtype = self.dtype + inputs = ops.convert_to_tensor(inputs, dtype=dtype) + + batch_size = array_ops.shape(inputs)[1] + if initial_state is None: + initial_state = self._zero_state(batch_size) + if self._rnn_mode == CUDNN_LSTM: + h, c = initial_state # pylint:disable=unbalanced-tuple-unpacking,unpacking-non-sequence + else: + h, = initial_state # pylint:disable=unbalanced-tuple-unpacking,unpacking-non-sequence + h = ops.convert_to_tensor(h, dtype=dtype) + if self._rnn_mode == CUDNN_LSTM: + c = ops.convert_to_tensor(c, dtype=dtype) + else: + # For model that doesn't take input_c, replace with a dummy tensor. + c = array_ops.constant([], dtype=dtype) + outputs, (output_h, output_c) = self._forward(inputs, h, c, self.kernel, + training) + if self._rnn_mode == CUDNN_LSTM: + return outputs, (output_h, output_c) + else: + return outputs, (output_h,) + + def state_shape(self, batch_size): + raise NotImplementedError + + def _zero_state(self, batch_size): + res = [] + for sp in self.state_shape(batch_size): + res.append(array_ops.zeros(sp, dtype=self.dtype)) + return tuple(res) + + def _canonical_weight_shape(self, layer): + """Shapes of Cudnn canonical weight tensors for given layer.""" + if layer < 0 or layer >= self._num_layers: + raise ValueError("\'layer\' is not valid, got %s, expecting [%d, %d]" % + (layer, 0, self._num_layers-1)) + if not self._input_size: + raise RuntimeError( + "%s._canonical_weight_shape invoked before input shape is known" % + type(self).__name__) + + input_size = self._input_size + num_units = self._num_units + num_gates = self._num_params_per_layer // 2 + is_bidi = self._direction == CUDNN_RNN_BIDIRECTION + + if layer == 0: + wts_applied_on_inputs = [(num_units, input_size)] * num_gates + else: + if is_bidi: + wts_applied_on_inputs = [(num_units, 2 * num_units)] * num_gates + else: + wts_applied_on_inputs = [(num_units, num_units)] * num_gates + wts_applied_on_hidden_states = [(num_units, num_units)] * num_gates + tf_wts = wts_applied_on_inputs + wts_applied_on_hidden_states + return tf_wts if not is_bidi else tf_wts * 2 + + def _canonical_bias_shape(self, unused_layer): + """Shapes of Cudnn canonical bias tensors for given layer.""" + num_dirs = 1 if self._direction == CUDNN_RNN_UNIDIRECTION else 2 + return [[self._num_units]] * num_dirs * self._num_params_per_layer + + def _canonical_to_opaque(self, cu_weights, cu_biases): + if not self._input_size: + raise RuntimeError( + "%s._canonical_to_opaque invoked before input shape is known" % + type(self).__name__) + return cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( + rnn_mode=self._rnn_mode, + num_layers=self._num_layers, + num_units=self._num_units, + input_size=self._input_size, + weights=cu_weights, + biases=cu_biases, + input_mode=self._input_mode, + direction=self._direction) + + def _forward(self, inputs, h, c, opaque_params, training): + output, output_h, output_c = cudnn_rnn_ops._cudnn_rnn( # pylint:disable=protected-access + inputs, + h, + c, + opaque_params, + training, + self._rnn_mode, + input_mode=self._input_mode, + direction=self._direction, + dropout=self._dropout, + seed=self._seed) + return output, (output_h, output_c) + + def _create_saveable(self): + """Create custom saveable for the Cudnn layer. + + Called during layer building process to make sharing checkpoints between + Cudnn and Cudnn-compatible RNNs easy. + Returns: + a `CudnnOpaqueParamsSaveable` object. + Raises: + RuntimeError: if any custom saveable is already created for this layer. + """ + if self._saveable is not None: + raise RuntimeError("Cudnn saveable already created.") + self._saveable = self._saveable_cls( # pylint:disable=not-callable + self.trainable_variables[0], + self.num_layers, + self.num_units, + self.input_size, + self.input_mode, + self.direction, + scope=vs.get_variable_scope(), + name="%s_saveable" % self.trainable_variables[0].op.name) + ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, self._saveable) + + +class CudnnLSTM(_CudnnRNN): + """Cudnn implementation of LSTM layer.""" + _rnn_mode = CUDNN_LSTM + _num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER + _saveable_cls = cudnn_rnn_ops.CudnnLSTMSaveable + + def state_shape(self, batch_size): + """Shape of Cudnn LSTM states. + + Shape is a 2-element tuple. Each is + [num_layers * num_dirs, batch_size, num_units] + Args: + batch_size: an int + Returns: + a tuple of python arrays. + """ + return ([self.num_layers * self.num_dirs, batch_size, self.num_units], + [self.num_layers * self.num_dirs, batch_size, self.num_units]) + + +class _CudnnRNNNoInputC(_CudnnRNN): + """Abstract simple CudnnRNN layer without input_c.""" + + def state_shape(self, batch_size): + """Shape of the state of Cudnn RNN cells w/o. input_c. + + Shape is a 1-element tuple, + [num_layers * num_dirs, batch_size, num_units] + Args: + batch_size: an int + Returns: + a tuple of python arrays. + """ + return [self.num_layers * self.num_dirs, batch_size, self.num_units], + + +class CudnnGRU(_CudnnRNNNoInputC): + """Cudnn implementation of the GRU layer.""" + _rnn_mode = CUDNN_GRU + _num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER + _saveable_cls = cudnn_rnn_ops.CudnnGRUSaveable + + +class CudnnRNNTanh(_CudnnRNNNoInputC): + """Cudnn implementation of the RNN-tanh layer.""" + _rnn_mode = CUDNN_RNN_TANH + _num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER + _saveable_cls = cudnn_rnn_ops.CudnnRNNTanhSaveable + + +class CudnnRNNRelu(_CudnnRNNNoInputC): + """Cudnn implementation of the RNN-relu layer.""" + _rnn_mode = CUDNN_RNN_RELU + _num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER + _saveable_cls = cudnn_rnn_ops.CudnnRNNReluSaveable diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index bbf1bd9bca..7d658c746e 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -717,12 +717,6 @@ _cudnn_rnn_common_doc_string = """ """ -def _check_direction(direction): - if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION): - raise ValueError("Invalid direction: %s, expect %s or %s" % - (direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION)) - - def _check_rnn_mode(rnn_mode): if rnn_mode not in (CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_TANH, CUDNN_RNN_RELU): raise ValueError("Invalid rnn_mode: %s, expect one of (%s, %s, %s, %s)" % @@ -737,14 +731,31 @@ def _get_seed(seed): return seed, seed2 +def check_direction(direction): + """Check validity of direction.""" + if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION): + raise ValueError("Invalid direction: %s, expecting %s or %s" % + (direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION)) + + +def check_input_mode(input_mode): + if input_mode not in (CUDNN_INPUT_LINEAR_MODE, CUDNN_INPUT_SKIP_MODE, + CUDNN_INPUT_AUTO_MODE): + raise ValueError("Invalid input_mode: %s, expect one of (%s, %s, %s)" % + (input_mode, CUDNN_INPUT_LINEAR_MODE, + CUDNN_INPUT_SKIP_MODE, CUDNN_INPUT_AUTO_MODE)) + + def _get_num_params(rnn_mode, num_layers, direction): """Return num params for given Cudnn config.""" if rnn_mode == CUDNN_LSTM: - num_params_per_layer = 8 + num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER elif rnn_mode == CUDNN_GRU: - num_params_per_layer = 6 - elif rnn_mode in (CUDNN_RNN_RELU, CUDNN_RNN_TANH): - num_params_per_layer = 2 + num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER + elif rnn_mode == CUDNN_RNN_RELU: + num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER + elif rnn_mode == CUDNN_RNN_TANH: + num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER else: raise ValueError("Invalid \'rnn_mode\': %s", rnn_mode) num_params = num_layers * num_params_per_layer @@ -794,7 +805,8 @@ def _cudnn_rnn(inputs, outputs, output_h, output_c """ _check_rnn_mode(rnn_mode) - _check_direction(direction) + check_direction(direction) + check_input_mode(input_mode) seed, seed2 = random_seed.get_seed(seed) outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn( input=inputs, @@ -1017,16 +1029,16 @@ def cudnn_rnn_tanh(inputs, seed, name) -def cudnn_rnn_params_to_canonical(rnn_mode, - num_layers, - num_units, - input_size, - params, - input_mode=CUDNN_INPUT_LINEAR_MODE, - direction=CUDNN_RNN_UNIDIRECTION, - dropout=0, - seed=0, - name=None): +def cudnn_rnn_opaque_params_to_canonical(rnn_mode, + num_layers, + num_units, + input_size, + params, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0, + seed=0, + name=None): """Convert cudnn opaque params to canonical. Args: @@ -1058,7 +1070,8 @@ def cudnn_rnn_params_to_canonical(rnn_mode, """ _check_rnn_mode(rnn_mode) - _check_direction(direction) + check_direction(direction) + check_input_mode(input_mode) num_params = _get_num_params(rnn_mode, num_layers, direction) seed, seed2 = random_seed.get_seed(seed) weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical( @@ -1077,17 +1090,17 @@ def cudnn_rnn_params_to_canonical(rnn_mode, return weights, biases -def cudnn_rnn_canonical_to_params(rnn_mode, - num_layers, - num_units, - input_size, - weights, - biases, - input_mode=CUDNN_INPUT_LINEAR_MODE, - direction=CUDNN_RNN_UNIDIRECTION, - dropout=0, - seed=0, - name=None): +def cudnn_rnn_canonical_to_opaque_params(rnn_mode, + num_layers, + num_units, + input_size, + weights, + biases, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0, + seed=0, + name=None): """Converts params from the canonical format to a specific format of cuDNN. Args: @@ -1119,7 +1132,8 @@ def cudnn_rnn_canonical_to_params(rnn_mode, ValueError: if rnn_mode or direction is invalid. """ _check_rnn_mode(rnn_mode) - _check_direction(direction) + check_direction(direction) + check_input_mode(input_mode) seed, seed2 = random_seed.get_seed(seed) return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params( rnn_mode=rnn_mode, @@ -1136,16 +1150,16 @@ def cudnn_rnn_canonical_to_params(rnn_mode, name=name) -def cudnn_opaque_params_size(rnn_mode, - num_layers, - num_units, - input_size, - input_mode=CUDNN_INPUT_LINEAR_MODE, - direction=CUDNN_RNN_UNIDIRECTION, - dtype=dtypes.float32, - dropout=0, - seed=0, - name=None): +def cudnn_rnn_opaque_params_size(rnn_mode, + num_layers, + num_units, + input_size, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dtype=dtypes.float32, + dropout=0, + seed=0, + name=None): """Returns opaque params size for specific Cudnn config. Args: @@ -1176,7 +1190,8 @@ def cudnn_opaque_params_size(rnn_mode, ValueError: if rnn_mode or direction is invalid. """ _check_rnn_mode(rnn_mode) - _check_direction(direction) + check_direction(direction) + check_input_mode(input_mode) seed, seed2 = random_seed.get_seed(seed) return gen_cudnn_rnn_ops.cudnn_rnn_params_size( rnn_mode=rnn_mode, @@ -1278,7 +1293,7 @@ class _CudnnRNN(object): Returns: The calculated parameter buffer size. """ - return cudnn_opaque_params_size( + return cudnn_rnn_opaque_params_size( rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, @@ -1327,7 +1342,7 @@ class _CudnnRNN(object): Returns: A function for the specific-to-canonical conversion. """ - return cudnn_rnn_params_to_canonical( + return cudnn_rnn_opaque_params_to_canonical( rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, @@ -1348,7 +1363,7 @@ class _CudnnRNN(object): Returns: A function for the canonical-to-params-to-specific conversion.. """ - return cudnn_rnn_canonical_to_params( + return cudnn_rnn_canonical_to_opaque_params( rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, -- GitLab From 76eb8726160a192ebe6ac5e61d0a0a539cc0dc1a Mon Sep 17 00:00:00 2001 From: Colin Raffel Date: Wed, 4 Oct 2017 18:51:57 -0700 Subject: [PATCH 033/909] Fix documentation error in tf.reverse docstring (#1) The first example in the tf.reverse docstring causes a ValueError: ```Python In [1]: import tensorflow as tf In [2]: t = tf.constant([[[[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]]) In [3]: dims = -1 In [4]: sess = tf.InteractiveSession() In [5]: tf.reverse(t, dims).eval() --------------------------------------------------------------------------- ValueError Traceback (most recent call last) in () ----> 1 tf.reverse(t, dims).eval() /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.pyc in reverse(tensor, axis, name) 2332 2333 def reverse(tensor, axis, name=None): -> 2334 return gen_array_ops.reverse_v2(tensor, axis, name) 2335 reverse.__doc__ = gen_array_ops.reverse_v2.__doc__ 2336 /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.pyc in reverse_v2(tensor, axis, name) 2697 """ 2698 result = _op_def_lib.apply_op("ReverseV2", tensor=tensor, axis=axis, -> 2699 name=name) 2700 return result 2701 /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.pyc in apply_op(self, op_type_name, name, **keywords) 765 op = g.create_op(op_type_name, inputs, output_types, name=scope, 766 input_types=input_types, attrs=attr_protos, --> 767 op_def=op_def) 768 if output_structure: 769 outputs = op.outputs /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device) 2506 original_op=self._default_original_op, op_def=op_def) 2507 if compute_shapes: -> 2508 set_shapes_for_outputs(ret) 2509 self._add_op(ret) 2510 self._record_op_seen_by_control_dependencies(ret) /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in set_shapes_for_outputs(op) 1871 shape_func = _call_cpp_shape_fn_and_require_op 1872 -> 1873 shapes = shape_func(op) 1874 if shapes is None: 1875 raise RuntimeError( /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/ops.pyc in call_with_requiring(op) 1821 1822 def call_with_requiring(op): -> 1823 return call_cpp_shape_fn(op, require_shape_fn=True) 1824 1825 _call_cpp_shape_fn_and_require_op = call_with_requiring /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.pyc in call_cpp_shape_fn(op, input_tensors_needed, input_tensors_as_shapes_needed, debug_python_shape_fn, require_shape_fn) 608 res = _call_cpp_shape_fn_impl(op, input_tensors_needed, 609 input_tensors_as_shapes_needed, --> 610 debug_python_shape_fn, require_shape_fn) 611 if not isinstance(res, dict): 612 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op). /Users/craffel/.pyenv/versions/2.7.13/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.pyc in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, debug_python_shape_fn, require_shape_fn) 674 missing_shape_fn = True 675 else: --> 676 raise ValueError(err.message) 677 678 if missing_shape_fn: ValueError: Shape must be rank 1 but is rank 0 for 'ReverseV2' (op: 'ReverseV2') with input shapes: [1,2,3,4], []. ``` --- tensorflow/core/ops/array_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index ad111fc6b8..8397ff52aa 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1117,7 +1117,7 @@ For example: # [20, 21, 22, 23]]]] # tensor 't' shape is [1, 2, 3, 4] -# 'dims' is [3] or 'dims' is -1 +# 'dims' is [3] or 'dims' is [-1] reverse(t, dims) ==> [[[[ 3, 2, 1, 0], [ 7, 6, 5, 4], [ 11, 10, 9, 8]], -- GitLab From f6e187acdd9bd1d3ac2d1d08809fffb25f4bd105 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 4 Oct 2017 19:07:31 -0700 Subject: [PATCH 034/909] Update the release notes with information about tf.data. Also adds a short porting guide to the tf.contrib.data README. PiperOrigin-RevId: 171097798 --- RELEASE.md | 15 +++++++++++++ tensorflow/contrib/data/README.md | 37 +++++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 634b31b82b..c5f1e8b309 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,16 @@ # Release 1.4.0 ## Major Features And Improvements +* `tf.data` is now part of the core TensorFlow API. + * The API is now subject to backwards compatibility guarantees. + * For a guide to migrating from the `tf.contrib.data` API, see the + [README] (https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/contrib/data/README.md). + * Major new features include `Dataset.from_generator()` (for building an input + pipeline from a Python generator), and the `Dataset.apply()` method for + applying custom transformation functions. + * Several custom transformation functions have been added, including + `tf.contrib.data.batch_and_drop_remainder()` and + `tf.contrib.data.sloppy_interleave()`. * Java: * Generics (e.g., `Tensor`) for improved type-safety (courtesy @andrewcmyers). * Support for multi-dimensional string tensors. @@ -16,6 +26,11 @@ flexible and reproducible package, is available via the new `tf.contrib.data.Dataset.from_generator` method! +## Breaking Changes to the API +* The signature of the `tf.contrib.data.rejection_resample()` function has been + changed. It now returns a function that can be used as an argument to + `Dataset.apply()`. + # Release 1.3.0 See also [TensorBoard 0.1.4](https://github.com/tensorflow/tensorboard/releases/tag/0.1.4) release notes. diff --git a/tensorflow/contrib/data/README.md b/tensorflow/contrib/data/README.md index 04f0560b09..30e909111f 100644 --- a/tensorflow/contrib/data/README.md +++ b/tensorflow/contrib/data/README.md @@ -2,9 +2,38 @@ ===================== NOTE: The `tf.contrib.data` module has been deprecated. Use `tf.data` instead. +We are continuing to support existing code using the `tf.contrib.data` APIs in +the current version of TensorFlow, but will eventually remove support. The +`tf.data` APIs are subject to backwards compatibility guarantees. -This directory contains the Python API for the `tf.contrib.data.Dataset` and -`tf.contrib.data.Iterator` classes, which can be used to build input pipelines. +Porting your code to `tf.data` +------------------------------ -The documentation for `tf.data` API has moved to the programmers' -guide, [here](../../docs_src/programmers_guide/datasets.md). +The `tf.contrib.data.Dataset` class has been renamed to `tf.data.Dataset`, and +the `tf.contrib.data.Iterator` class has been renamed to `tf.data.Iterator`. +Most code can be ported by removing `.contrib` from the names of the classes. +However, there are some small differences, which are outlined below. + +The arguments accepted by the `Dataset.map()` transformation have changed: + +* `dataset.map(..., num_threads=T)` is now `dataset.map(num_parallel_calls=T)`. +* `dataset.map(..., output_buffer_size=B)` is now + `dataset.map(...).prefetch(B). + +Some transformations have been removed from `tf.data.Dataset`, and you must +instead apply them using `Dataset.apply()` transformation. The full list of +changes is as follows: + +* `dataset.dense_to_sparse_batch(...)` is now + `dataset.apply(tf.contrib.data.dense_to_sparse_batch(...)`. +* `dataset.enumerate(...)` is now + `dataset.apply(tf.contrib.data.enumerate_dataset(...))`. +* `dataset.group_by_window(...)` is now + `dataset.apply(tf.contrib.data.group_by_window(...))`. +* `dataset.ignore_errors()` is now + `dataset.apply(tf.contrib.data.ignore_errors())`. +* `dataset.unbatch()` is now `dataset.apply(tf.contrib.data.unbatch())`. + +The `Dataset.make_dataset_resource()` and `Iterator.dispose_op()` methods have +been removed from the API. Please open a GitHub issue if you have a need for +either of these. -- GitLab From 73b1adc5085ee8f4a8a190287e3e4d33fe1409f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 19:07:51 -0700 Subject: [PATCH 035/909] Renames variable for consistency with flag. PiperOrigin-RevId: 171097818 --- .../examples/speech_commands/test_streaming_accuracy.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/speech_commands/test_streaming_accuracy.cc b/tensorflow/examples/speech_commands/test_streaming_accuracy.cc index 5a98264401..2972ab778b 100644 --- a/tensorflow/examples/speech_commands/test_streaming_accuracy.cc +++ b/tensorflow/examples/speech_commands/test_streaming_accuracy.cc @@ -231,7 +231,7 @@ int main(int argc, char* argv[]) { } const int64 clip_duration_samples = (clip_duration_ms * sample_rate) / 1000; - const int64 sample_stride_samples = (clip_stride_ms * sample_rate) / 1000; + const int64 clip_stride_samples = (clip_stride_ms * sample_rate) / 1000; Tensor audio_data_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({clip_duration_samples, 1})); @@ -246,7 +246,7 @@ int main(int argc, char* argv[]) { const int64 audio_data_end = (sample_count - clip_duration_ms); for (int64 audio_data_offset = 0; audio_data_offset < audio_data_end; - audio_data_offset += sample_stride_samples) { + audio_data_offset += clip_stride_samples) { const float* input_start = &(audio_data[audio_data_offset]); const float* input_end = input_start + clip_duration_samples; std::copy(input_start, input_end, audio_data_tensor.flat().data()); -- GitLab From c38773f18bfdce1de16ab5110e0cbbd50f0d6a79 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 4 Oct 2017 19:11:41 -0700 Subject: [PATCH 036/909] [XLA] Fix build of dumped_computation_to_text after change that removed an arg from CompileExecutable. PiperOrigin-RevId: 171098077 --- tensorflow/compiler/xla/tools/dumped_computation_to_text.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc index 2a3a880328..78d8fb1f43 100644 --- a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc +++ b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc @@ -61,9 +61,9 @@ void RealMain(tensorflow::gtl::ArraySlice args, bool compile) { layouts.push_back(&program_shape->parameters(i)); } StatusOr> executable = - local_service->CompileExecutable( - computation.handle(), layouts, &program_shape->result(), - /*device_ordinal=*/0, /*has_hybrid_result=*/true); + local_service->CompileExecutable(computation.handle(), layouts, + &program_shape->result(), + /*device_ordinal=*/0); const HloModule& module = executable.ValueOrDie()->module(); -- GitLab From 0b863e0fef15f470265e0a87e660e421c6bc5ea1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 19:07:51 -0700 Subject: [PATCH 037/909] Renames variable for consistency with flag. PiperOrigin-RevId: 171097818 --- tensorflow/compiler/xla/tools/dumped_computation_to_text.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc index 78d8fb1f43..2a3a880328 100644 --- a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc +++ b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc @@ -61,9 +61,9 @@ void RealMain(tensorflow::gtl::ArraySlice args, bool compile) { layouts.push_back(&program_shape->parameters(i)); } StatusOr> executable = - local_service->CompileExecutable(computation.handle(), layouts, - &program_shape->result(), - /*device_ordinal=*/0); + local_service->CompileExecutable( + computation.handle(), layouts, &program_shape->result(), + /*device_ordinal=*/0, /*has_hybrid_result=*/true); const HloModule& module = executable.ValueOrDie()->module(); -- GitLab From f2114a01130ded172ea4afb8f3ca20294ae62961 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 4 Oct 2017 19:11:41 -0700 Subject: [PATCH 038/909] [XLA] Fix build of dumped_computation_to_text after change that removed an arg from CompileExecutable. PiperOrigin-RevId: 171098077 --- tensorflow/compiler/xla/tools/dumped_computation_to_text.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc index 2a3a880328..78d8fb1f43 100644 --- a/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc +++ b/tensorflow/compiler/xla/tools/dumped_computation_to_text.cc @@ -61,9 +61,9 @@ void RealMain(tensorflow::gtl::ArraySlice args, bool compile) { layouts.push_back(&program_shape->parameters(i)); } StatusOr> executable = - local_service->CompileExecutable( - computation.handle(), layouts, &program_shape->result(), - /*device_ordinal=*/0, /*has_hybrid_result=*/true); + local_service->CompileExecutable(computation.handle(), layouts, + &program_shape->result(), + /*device_ordinal=*/0); const HloModule& module = executable.ValueOrDie()->module(); -- GitLab From ef2ee630e8fe290b06363f13ff440b4efcec9c81 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 19:12:55 -0700 Subject: [PATCH 039/909] Fixes docs. PiperOrigin-RevId: 171098172 --- tensorflow/docs_src/tutorials/audio_recognition.md | 2 +- tensorflow/examples/speech_commands/freeze.py | 2 +- .../examples/speech_commands/generate_streaming_test_wav.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/tutorials/audio_recognition.md b/tensorflow/docs_src/tutorials/audio_recognition.md index 1ede915c01..670e480b12 100644 --- a/tensorflow/docs_src/tutorials/audio_recognition.md +++ b/tensorflow/docs_src/tutorials/audio_recognition.md @@ -361,7 +361,7 @@ This will output information about the number of words correctly matched, how many were given the wrong labels, and how many times the model triggered when there was no real word spoken. There are various parameters that control how the signal averaging works, including `--average_window_ms` which sets the length of -time to average results over, `--sample_stride_ms` which is the time between +time to average results over, `--clip_stride_ms` which is the time between applications of the model, `--suppression_ms` which stops subsequent word detections from triggering for a certain time after an initial one is found, and `--detection_threshold`, which controls how high the average score must be diff --git a/tensorflow/examples/speech_commands/freeze.py b/tensorflow/examples/speech_commands/freeze.py index cc2df9660a..c8671d9c41 100644 --- a/tensorflow/examples/speech_commands/freeze.py +++ b/tensorflow/examples/speech_commands/freeze.py @@ -153,7 +153,7 @@ if __name__ == '__main__': '--window_stride_ms', type=float, default=10.0, - help='How long each spectrogram timeslice is',) + help='How long the stride is between spectrogram timeslices',) parser.add_argument( '--dct_coefficient_count', type=int, diff --git a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py index ac7c11856e..053206ae2f 100644 --- a/tensorflow/examples/speech_commands/generate_streaming_test_wav.py +++ b/tensorflow/examples/speech_commands/generate_streaming_test_wav.py @@ -240,7 +240,7 @@ if __name__ == '__main__': '--window_stride_ms', type=float, default=10.0, - help='How long each spectrogram timeslice is',) + help='How long the stride is between spectrogram timeslices',) parser.add_argument( '--dct_coefficient_count', type=int, -- GitLab From 2c3bf9eff79156e32512e8d6da2179cd044167b8 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 4 Oct 2017 19:14:02 -0700 Subject: [PATCH 040/909] [Windows] Include tf.contrib.image ops as part of the Windows build. Fixes #9672. PiperOrigin-RevId: 171098255 --- tensorflow/contrib/cmake/tf_core_kernels.cmake | 8 ++++++++ tensorflow/contrib/cmake/tf_core_ops.cmake | 2 ++ tensorflow/contrib/cmake/tf_python.cmake | 4 ++++ tensorflow/contrib/cmake/tf_tests.cmake | 1 + tensorflow/contrib/image/BUILD | 1 + tensorflow/contrib/image/python/ops/distort_image_ops.py | 3 ++- .../python/ops/single_image_random_dot_stereograms.py | 3 ++- 7 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 61c6686ee0..46c680aad5 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -74,6 +74,13 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) #"${tensorflow_source_dir}/tensorflow/contrib/ffmpeg/encode_audio_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/framework/kernels/zero_initializer_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/adjust_hsv_in_yiq_op.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/bipartite_match_op.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/image_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/single_image_random_dot_stereograms_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/ops/distort_image_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/ops/image_ops.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc" "${tensorflow_source_dir}/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc" "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc" "${tensorflow_source_dir}/tensorflow/contrib/nccl/kernels/nccl_manager.cc" @@ -167,6 +174,7 @@ endif(WIN32) file(GLOB_RECURSE tf_core_gpu_kernels_srcs "${tensorflow_source_dir}/tensorflow/core/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/framework/kernels/zero_initializer_op_gpu.cu.cc" + "${tensorflow_source_dir}/tensorflow/contrib/image/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/rnn/kernels/*.cu.cc" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/kernels/*.cu.cc" ) diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 78bccc08a3..dc9973917e 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -84,6 +84,8 @@ GENERATE_CONTRIB_OP_LIBRARY(factorization_factorization "${tensorflow_source_dir GENERATE_CONTRIB_OP_LIBRARY(framework_variable "${tensorflow_source_dir}/tensorflow/contrib/framework/ops/variable_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(input_pipeline "${tensorflow_source_dir}/tensorflow/contrib/input_pipeline/ops/input_pipeline_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(image "${tensorflow_source_dir}/tensorflow/contrib/image/ops/image_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(image_distort_image "${tensorflow_source_dir}/tensorflow/contrib/image/ops/distort_image_ops.cc") +GENERATE_CONTRIB_OP_LIBRARY(image_sirds "${tensorflow_source_dir}/tensorflow/contrib/image/ops/single_image_random_dot_stereograms_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(layers_sparse_feature_cross "${tensorflow_source_dir}/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc") GENERATE_CONTRIB_OP_LIBRARY(memory_stats "${tensorflow_source_dir}/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc") GENERATE_CONTRIB_OP_LIBRARY(nccl "${tensorflow_source_dir}/tensorflow/contrib/nccl/ops/nccl_ops.cc") diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 1e78f1e983..bb3e69d53c 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -773,6 +773,10 @@ GENERATE_PYTHON_OP_LIB("contrib_input_pipeline_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/input_pipeline/ops/gen_input_pipeline_ops.py) GENERATE_PYTHON_OP_LIB("contrib_image_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/image/ops/gen_image_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_image_distort_image_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/image/ops/gen_distort_image_ops.py) +GENERATE_PYTHON_OP_LIB("contrib_image_sirds_ops" + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/image/ops/gen_single_image_random_dot_stereograms_ops.py) GENERATE_PYTHON_OP_LIB("contrib_layers_sparse_feature_cross_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/layers/ops/gen_sparse_feature_cross_op.py) GENERATE_PYTHON_OP_LIB("contrib_memory_stats_ops" diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index ba78e87ac0..658d19e493 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -152,6 +152,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/training/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/data/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/factorization/*_test.py" + "${tensorflow_source_dir}/tensorflow/contrib/image/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/keras/python/keras/integration_test.py" "${tensorflow_source_dir}/tensorflow/contrib/nearest_neighbor/python/kernel_tests/*_test.py" "${tensorflow_source_dir}/tensorflow/contrib/seq2seq/python/kernel_tests/*_test.py" diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index a18f14112e..d0600d4668 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -211,6 +211,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":image_py", + ":single_image_random_dot_stereograms_ops", "//tensorflow/contrib/util:util_py", "//tensorflow/python:framework_ops", "//tensorflow/python:platform", diff --git a/tensorflow/contrib/image/python/ops/distort_image_ops.py b/tensorflow/contrib/image/python/ops/distort_image_ops.py index 39f023a2b4..06e8e4ee72 100644 --- a/tensorflow/contrib/image/python/ops/distort_image_ops.py +++ b/tensorflow/contrib/image/python/ops/distort_image_ops.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.image.ops import gen_distort_image_ops from tensorflow.contrib.util import loader from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -132,7 +133,7 @@ def adjust_hsv_in_yiq(image, orig_dtype = image.dtype flt_image = image_ops.convert_image_dtype(image, dtypes.float32) - rgb_altered = _distort_image_ops.adjust_hsv_in_yiq( + rgb_altered = gen_distort_image_ops.adjust_hsv_in_yiq( flt_image, delta_hue, scale_saturation, scale_value) return image_ops.convert_image_dtype(rgb_altered, orig_dtype) diff --git a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py index 79261c5e75..5cccf26028 100755 --- a/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py +++ b/tensorflow/contrib/image/python/ops/single_image_random_dot_stereograms.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.image.ops import gen_single_image_random_dot_stereograms_ops from tensorflow.contrib.util import loader from tensorflow.python.framework import ops from tensorflow.python.platform import resource_loader @@ -107,7 +108,7 @@ def single_image_random_dot_stereograms( 'depth_values' """ - result = _sirds_ops.single_image_random_dot_stereograms( + result = gen_single_image_random_dot_stereograms_ops.single_image_random_dot_stereograms( # pylint: disable=line-too-long depth_values=depth_values, hidden_surface_removal=hidden_surface_removal, convergence_dots_size=convergence_dots_size, -- GitLab From a3e5b1628322102914a46a5fbfca2db5cb8b9e11 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 19:28:04 -0700 Subject: [PATCH 041/909] Avoids adding duplicate legacy_init_op to the saved_model's exported meta graph. Previously, when the user restores graph from one meta graph generated from saved_model and then re-generates another saved model, the re-generated model will be invalid because it will contain duplicate legacy_init_ops. PiperOrigin-RevId: 171099152 --- tensorflow/python/saved_model/builder_impl.py | 7 ++++- .../python/saved_model/saved_model_test.py | 30 ++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 73a3f9075d..16651ffebc 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -140,11 +140,16 @@ class SavedModelBuilder(object): Raises: TypeError if legacy init op is not of type `Operation`. + AssertionError if the graph already contains one or more legacy init ops. """ if legacy_init_op is not None: if not isinstance(legacy_init_op, ops.Operation): raise TypeError("legacy_init_op needs to be an Operation: %r" % legacy_init_op) + if ops.get_collection(constants.LEGACY_INIT_OP_KEY): + raise AssertionError( + "graph already contains one or more legacy init ops under the " + "collection {}.".format(constants.LEGACY_INIT_OP_KEY)) ops.add_to_collection(constants.LEGACY_INIT_OP_KEY, legacy_init_op) def _add_main_op(self, main_op): @@ -258,7 +263,7 @@ class SavedModelBuilder(object): Raises: AssertionError: If the variables for the SavedModel have not been saved - yet. + yet, or if the graph already contains one or more legacy init ops. """ if not self._has_saved_variables: raise AssertionError( diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index 5639e6855d..c6d2c32293 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -1,4 +1,4 @@ -## Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -637,6 +637,34 @@ class SavedModelTest(test.TestCase): # the legacy_init_op, following a restore. self.assertEqual(3, ops.get_collection("v")[2].eval()) + def testLegacyInitOpWithNonEmptyCollection(self): + export_dir = os.path.join(test.get_temp_dir(), + "test_legacy_init_op_with_non_empty_collection") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=ops.Graph()) as sess: + # Initialize variable `v1` to 1. + v1 = variables.Variable(1, name="v1") + ops.add_to_collection("v", v1) + + # Initialize another variable `v2` to 42. + v2 = variables.Variable(42, name="v2", trainable=False, collections=[]) + ops.add_to_collection("v", v2) + + # Set up an assignment op to be run as part of the legacy_init_op. + assign_v2 = state_ops.assign(v2, v1) + legacy_init_op = control_flow_ops.group(assign_v2, name="legacy_init_op") + + sess.run(variables.global_variables_initializer()) + + ops.add_to_collection(constants.LEGACY_INIT_OP_KEY, + control_flow_ops.no_op()) + # AssertionError should be raised since the LEGACY_INIT_OP_KEY collection + # is not empty and we don't support multiple init ops. + with self.assertRaises(AssertionError): + builder.add_meta_graph_and_variables( + sess, ["foo"], legacy_init_op=legacy_init_op) + def testMultipleAssets(self): export_dir = os.path.join(test.get_temp_dir(), "test_multiple_assets") builder = saved_model_builder.SavedModelBuilder(export_dir) -- GitLab From 2f0787e1c8a7090fd231dac217e26824d8bc09c3 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 4 Oct 2017 19:31:06 -0700 Subject: [PATCH 042/909] Change all quotes for TF_CONFIG from ' to " as JSON requires that. PiperOrigin-RevId: 171099341 --- tensorflow/python/estimator/training.py | 64 ++++++++++++------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 1bed19760b..17c072566a 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -328,29 +328,29 @@ def train_and_evaluate(estimator, train_spec, eval_spec): Setting environment variable depends on the platform. For example, on Linux, it can be done as follows (`$` is the shell prompt): ``` - $ TF_CONFIG="" python train_model.py + $ TF_CONFIG='' python train_model.py ``` For the content in `TF_CONFIG`, assume that the training cluster spec looks like: ``` - cluster = {'chief': ['host0:2222'], - 'worker': ['host1:2222', 'host2:2222', 'host3:2222'], - 'ps': ['host4:2222', 'host5:2222']} + cluster = {"chief": ["host0:2222"], + "worker": ["host1:2222", "host2:2222", "host3:2222"], + "ps": ["host4:2222", "host5:2222"]} ``` Example of `TF_CONFIG` for chief training worker (must have one and only one): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. - TF_CONFIG="{ - 'cluster': { - 'chief': ['host0:2222'], - 'worker': ['host1:2222', 'host2:2222', 'host3:2222'], - 'ps': ['host4:2222', 'host5:2222'] + TF_CONFIG='{ + "cluster": { + "chief": ["host0:2222"], + "worker": ["host1:2222", "host2:2222", "host3:2222"], + "ps": ["host4:2222", "host5:2222"] }, - 'task': {'type': 'chief', 'index': 0} - }" + "task": {"type": "chief", "index": 0} + }' ``` Note that the chief worker also does the model training job, similar to other non-chief training workers (see next paragraph). In addition to the model @@ -362,14 +362,14 @@ def train_and_evaluate(estimator, train_spec, eval_spec): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. - TF_CONFIG="{ - 'cluster': { - 'chief': ['host0:2222'], - 'worker': ['host1:2222', 'host2:2222', 'host3:2222'], - 'ps': ['host4:2222', 'host5:2222'] + TF_CONFIG='{ + "cluster": { + "chief": ["host0:2222"], + "worker": ["host1:2222", "host2:2222", "host3:2222"], + "ps": ["host4:2222", "host5:2222"] }, - 'task': {'type': 'worker', 'index': 0} - }" + "task": {"type": "worker", "index": 0} + }' ``` where the `task.index` should be set as 0, 1, 2, in this example, respectively for non-chief training workers. @@ -378,14 +378,14 @@ def train_and_evaluate(estimator, train_spec, eval_spec): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. - TF_CONFIG="{ - 'cluster': { - 'chief': ['host0:2222'], - 'worker': ['host1:2222', 'host2:2222', 'host3:2222'], - 'ps': ['host4:2222', 'host5:2222'] + TF_CONFIG='{ + "cluster": { + "chief": ["host0:2222"], + "worker": ["host1:2222", "host2:2222", "host3:2222"], + "ps": ["host4:2222", "host5:2222"] }, - 'task': {'type': 'ps', 'index': 0} - }" + "task": {"type": "ps", "index": 0} + }' ``` where the `task.index` should be set as 0 and 1, in this example, respectively for parameter servers. @@ -396,14 +396,14 @@ def train_and_evaluate(estimator, train_spec, eval_spec): ``` # This should be a JSON string, which is set as environment variable. Usually # the cluster manager handles that. - TF_CONFIG="{ - 'cluster': { - 'chief': ['host0:2222'], - 'worker': ['host1:2222', 'host2:2222', 'host3:2222'], - 'ps': ['host4:2222', 'host5:2222'] + TF_CONFIG='{ + "cluster": { + "chief": ["host0:2222"], + "worker": ["host1:2222", "host2:2222", "host3:2222"], + "ps": ["host4:2222", "host5:2222"] }, - 'task': {'type': 'evaluator', 'index': 0} - }" + "task": {"type": "evaluator", "index": 0} + }' ``` Args: -- GitLab From 5267759301eeda724c788c6eb9fdaf624c644a7e Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 4 Oct 2017 19:42:46 -0700 Subject: [PATCH 043/909] [XLA] Add shape print-out to message for rank-test failure. PiperOrigin-RevId: 171100052 --- tensorflow/compiler/xla/tests/literal_test_util.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 4d8b50fbbf..061a4e190f 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -49,7 +49,9 @@ namespace xla { AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); } } else { - ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)); + ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); ASSERT_EQ(expected.element_type(), actual.element_type()) << PrimitiveType_Name(expected.element_type()) << " vs " << PrimitiveType_Name(actual.element_type()); -- GitLab From df2768c93b60fd60e353cebddc27de8390bebd4b Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Wed, 4 Oct 2017 20:17:39 -0700 Subject: [PATCH 044/909] Fix silly typo PiperOrigin-RevId: 171102230 --- tensorflow/contrib/quantize/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/__init__.py b/tensorflow/contrib/quantize/__init__.py index f137723cb6..5d4e4575c9 100644 --- a/tensorflow/contrib/quantize/__init__.py +++ b/tensorflow/contrib/quantize/__init__.py @@ -25,7 +25,7 @@ from tensorflow.contrib.quantize.python.quantize_graph import * from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - "create_eval_graph," + "create_eval_graph", "create_training_graph", ] -- GitLab From 929e9c5578c3d38df28da57ca22d1e4ce2600987 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 21:21:50 -0700 Subject: [PATCH 045/909] Fix docstring. PiperOrigin-RevId: 171105949 --- tensorflow/contrib/gan/python/namedtuples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/namedtuples.py b/tensorflow/contrib/gan/python/namedtuples.py index 27512526c4..48f5e8e47d 100644 --- a/tensorflow/contrib/gan/python/namedtuples.py +++ b/tensorflow/contrib/gan/python/namedtuples.py @@ -120,7 +120,7 @@ class GANLoss( """GANLoss contains the generator and discriminator losses. Args: - generator_loss: A tensor for the generator loss.. + generator_loss: A tensor for the generator loss. discriminator_loss: A tensor for the discriminator loss. """ -- GitLab From 165dd023351359171b0fe4f19c63a42aac4c2e47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 21:33:15 -0700 Subject: [PATCH 046/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171106509 --- .../core/ops/compat/ops_history.v1.pbtxt | 99 ++++++++++++ tensorflow/core/ops/ops.pbtxt | 143 ++++++++++++++---- 2 files changed, 213 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index e28b43c916..950422305e 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -12835,6 +12835,33 @@ op { } } } +op { + name: "LogMatrixDeterminant" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "sign" + type_attr: "T" + } + output_arg { + name: "log_abs_determinant" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} op { name: "LogSoftmax" input_arg { @@ -20216,6 +20243,78 @@ op { } is_stateful: true } +op { + name: "RandomPoissonV2" + input_arg { + name: "shape" + type_attr: "S" + } + input_arg { + name: "rate" + type_attr: "R" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "S" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "R" + type: "type" + default_value { + type: DT_DOUBLE + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "RandomShuffle" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index b8f827f1f7..cbde462325 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -11632,6 +11632,38 @@ op { summary: "Computes natural logarithm of (1 + x) element-wise." description: "I.e., \\\\(y = \\log_e (1 + x)\\\\)." } +op { + name: "LogMatrixDeterminant" + input_arg { + name: "input" + description: "Shape is `[N, M, M]`." + type_attr: "T" + } + output_arg { + name: "sign" + description: "The signs of the log determinants of the inputs. Shape is `[N]`." + type_attr: "T" + } + output_arg { + name: "log_abs_determinant" + description: "The logs of the absolute values of the determinants\nof the N input matrices. Shape is `[N]`." + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + summary: "Computes the sign and the log of the absolute value of the determinant of" + description: "one or more square matrices.\n\nThe input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions\nform square matrices. The outputs are two tensors containing the signs and\nabsolute values of the log determinants for all N input submatrices\n`[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).\nThe log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU\nis the LU decomposition of the input and P is the corresponding\npermutation matrix." +} op { name: "LogSoftmax" input_arg { @@ -18778,6 +18810,85 @@ op { description: "This op uses two algorithms, depending on rate. If rate >= 10, then\nthe algorithm by Hormann is used to acquire samples via\ntransformation-rejection.\nSee http://www.sciencedirect.com/science/article/pii/0167668793909974.\n\nOtherwise, Knuth\'s algorithm is used to acquire samples via multiplying uniform\nrandom variables.\nSee Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer\nProgramming, Volume 2. Addison Wesley" is_stateful: true } +op { + name: "RandomPoissonV2" + input_arg { + name: "shape" + description: "1-D integer tensor. Shape of independent samples to draw from each\ndistribution described by the shape parameters given in rate." + type_attr: "S" + } + input_arg { + name: "rate" + description: "A tensor in which each scalar is a \"rate\" parameter describing the\nassociated poisson distribution." + type_attr: "R" + } + output_arg { + name: "output" + description: "A tensor with shape `shape + shape(rate)`. Each slice\n`[:, ..., :, i0, i1, ...iN]` contains the samples drawn for\n`rate[i0, i1, ...iN]`." + type_attr: "dtype" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + description: "If either `seed` or `seed2` are set to be non-zero, the random number\ngenerator is seeded by the given seed. Otherwise, it is seeded by a\nrandom seed." + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + description: "A second seed to avoid seed collision." + } + attr { + name: "S" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "R" + type: "type" + default_value { + type: DT_DOUBLE + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + summary: "Outputs random values from the Poisson distribution(s) described by rate." + description: "This op uses two algorithms, depending on rate. If rate >= 10, then\nthe algorithm by Hormann is used to acquire samples via\ntransformation-rejection.\nSee http://www.sciencedirect.com/science/article/pii/0167668793909974.\n\nOtherwise, Knuth\'s algorithm is used to acquire samples via multiplying uniform\nrandom variables.\nSee Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer\nProgramming, Volume 2. Addison Wesley" + is_stateful: true +} op { name: "RandomShuffle" input_arg { @@ -31758,40 +31869,14 @@ op { name: "Where" input_arg { name: "input" - type_attr: "T" + type: DT_BOOL } output_arg { name: "index" type: DT_INT64 } - attr { - name: "T" - type: "type" - default_value { - type: DT_BOOL - } - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF - type: DT_BOOL - } - } - } - summary: "Returns locations of nonzero / true values in a tensor." - description: "This operation returns the coordinates of true elements in `input`. The\ncoordinates are returned in a 2-D tensor where the first dimension (rows)\nrepresents the number of true elements, and the second dimension (columns)\nrepresents the coordinates of the true elements. Keep in mind, the shape of\nthe output tensor can vary depending on how many true values there are in\n`input`. Indices are output in row-major order.\n\nFor example:\n\n```\n# \'input\' tensor is [[True, False]\n# [True, False]]\n# \'input\' has two true values, so output has two coordinates.\n# \'input\' has rank of 2, so coordinates have two indices.\nwhere(input) ==> [[0, 0],\n [1, 0]]\n\n# `input` tensor is [[[True, False]\n# [True, False]]\n# [[False, True]\n# [False, True]]\n# [[False, False]\n# [False, True]]]\n# \'input\' has 5 true values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n\n# `input` tensor is [[[1.5, 0.0]\n# [-0.5, 0.0]]\n# [[0.0, 0.25]\n# [0.0, 0.75]]\n# [[0.0, 0.0]\n# [0.0, 0.01]]]\n# \'input\' has 5 nonzero values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n\n# `input` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j]\n# [0.0 + 0.5j, 0.0 + 0.0j]]\n# [[0.0 + 0.0j, 0.25 + 1.5j]\n# [0.0 + 0.0j, 0.75 + 0.0j]]\n# [[0.0 + 0.0j, 0.0 + 0.0j]\n# [0.0 + 0.0j, 0.01 + 0.0j]]]\n# \'input\' has 5 nonzero magnitude values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n```" + summary: "Returns locations of true values in a boolean tensor." + description: "This operation returns the coordinates of true elements in `input`. The\ncoordinates are returned in a 2-D tensor where the first dimension (rows)\nrepresents the number of true elements, and the second dimension (columns)\nrepresents the coordinates of the true elements. Keep in mind, the shape of\nthe output tensor can vary depending on how many true values there are in\n`input`. Indices are output in row-major order.\n\nFor example:\n\n```\n# \'input\' tensor is [[True, False]\n# [True, False]]\n# \'input\' has two true values, so output has two coordinates.\n# \'input\' has rank of 2, so coordinates have two indices.\nwhere(input) ==> [[0, 0],\n [1, 0]]\n\n# `input` tensor is [[[True, False]\n# [True, False]]\n# [[False, True]\n# [False, True]]\n# [[False, False]\n# [False, True]]]\n# \'input\' has 5 true values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n```" } op { name: "WholeFileReader" -- GitLab From 55e765b578529364522b92d732d1240243412197 Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Wed, 4 Oct 2017 21:35:16 -0700 Subject: [PATCH 047/909] BUGFIX: AbsoluteValue.invert(y) raises if y < 0 and validate_args PiperOrigin-RevId: 171106639 --- .../bijectors/absolute_value_test.py | 12 ++++++++++ .../ops/bijectors/absolute_value_impl.py | 23 +++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py index da50037d6e..e0d65c79b2 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/absolute_value_test.py @@ -68,6 +68,18 @@ class AbsoluteValueTest(test.TestCase): sess.run(abs_bijector.inverse_log_det_jacobian([1.]), feed_dict={event_ndims: 1}) + def testNegativeYRaisesForInverseIfValidateArgs(self): + with self.test_session() as sess: + bijector = AbsoluteValue(event_ndims=0, validate_args=True) + with self.assertRaisesOpError("y was negative"): + sess.run(bijector.inverse(-1.)) + + def testNegativeYRaisesForILDJIfValidateArgs(self): + with self.test_session() as sess: + bijector = AbsoluteValue(event_ndims=0, validate_args=True) + with self.assertRaisesOpError("y was negative"): + sess.run(bijector.inverse_log_det_jacobian(-1.)) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py index 065a049cf7..b84502003a 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/absolute_value_impl.py @@ -35,7 +35,17 @@ class AbsoluteValue(bijector.Bijector): """Computes `Y = g(X) = Abs(X)`, element-wise. This non-injective bijector allows for transformations of scalar distributions - with the absolute value function. + with the absolute value function, which maps `(-inf, inf)` to `[0, inf)`. + + * For `y in (0, inf)`, `AbsoluteValue.inverse(y)` returns the set inverse + `{x in (-inf, inf) : |x| = y}` as a tuple, `-y, y`. + * `AbsoluteValue.inverse(0)` returns `0, 0`, which is not the set inverse + (the set inverse is the singleton `{0}`), but "works" in conjunction with + `TransformedDistribution` to produce a left semi-continuous pdf. + * For `y < 0`, `AbsoluteValue.inverse(y)` happily returns the + wrong thing, `-y, y`. This is done for efficiency. If + `validate_args == True`, `y < 0` will raise an exception. + ```python abs = ds.bijectors.AbsoluteValue() @@ -68,7 +78,8 @@ class AbsoluteValue(bijector.Bijector): with a particular draw from the distribution. Currently only zero is supported. validate_args: Python `bool` indicating whether arguments should be - checked for correctness. + checked for correctness, in particular whether inputs to `inverse` and + `inverse_log_det_jacobian` are non-negative. name: Python `str` name given to ops managed by this object. Raises: @@ -98,6 +109,10 @@ class AbsoluteValue(bijector.Bijector): return math_ops.abs(x) def _inverse(self, y): + if self.validate_args: + y = control_flow_ops.with_dependencies( + [check_ops.assert_non_negative(y, message="Argument y was negative")], + y) return -y, y def _inverse_log_det_jacobian(self, y): @@ -106,6 +121,10 @@ class AbsoluteValue(bijector.Bijector): # so Log|DF^{-1}(y)| = Log[1, 1] = [0, 0]. batch_shape = array_ops.shape(y)[:array_ops.rank(y) - self.event_ndims] zeros = array_ops.zeros(batch_shape, dtype=y.dtype) + if self.validate_args: + zeros = control_flow_ops.with_dependencies( + [check_ops.assert_non_negative(y, message="Argument y was negative")], + zeros) return zeros, zeros @property -- GitLab From 07124fac0ec20e584d018035300d44ee55e451f0 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 4 Oct 2017 22:24:29 -0700 Subject: [PATCH 048/909] Fix build of dumped_computation_to_operation_list. CompileExecutable had its last arg removed. PiperOrigin-RevId: 171109500 --- .../xla/tools/dumped_computation_to_operation_list.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/dumped_computation_to_operation_list.cc b/tensorflow/compiler/xla/tools/dumped_computation_to_operation_list.cc index aa297ac171..5ede37b873 100644 --- a/tensorflow/compiler/xla/tools/dumped_computation_to_operation_list.cc +++ b/tensorflow/compiler/xla/tools/dumped_computation_to_operation_list.cc @@ -86,9 +86,9 @@ void RealMain(tensorflow::gtl::ArraySlice args) { layouts.push_back(&program_shape->parameters(i)); } StatusOr> executable = - local_service->CompileExecutable( - computation.handle(), layouts, &program_shape->result(), - /*device_ordinal=*/0, /*has_hybrid_result=*/true); + local_service->CompileExecutable(computation.handle(), layouts, + &program_shape->result(), + /*device_ordinal=*/0); const HloModule& module = executable.ValueOrDie()->module(); -- GitLab From cde6636b0130e639fcc3e157dc09aeb816a35e05 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 4 Oct 2017 22:33:17 -0700 Subject: [PATCH 049/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171110005 --- tensorflow/go/op/wrappers.go | 1026 ++++++++++++++++++---------------- 1 file changed, 551 insertions(+), 475 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 09a509f21b..ef1f8a9df6 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -1412,7 +1412,7 @@ func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { return op.Output(0) } -// Returns locations of nonzero / true values in a tensor. +// Returns locations of true values in a boolean tensor. // // This operation returns the coordinates of true elements in `input`. The // coordinates are returned in a 2-D tensor where the first dimension (rows) @@ -1444,34 +1444,6 @@ func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { // [1, 0, 1], // [1, 1, 1], // [2, 1, 1]] -// -// # `input` tensor is [[[1.5, 0.0] -// # [-0.5, 0.0]] -// # [[0.0, 0.25] -// # [0.0, 0.75]] -// # [[0.0, 0.0] -// # [0.0, 0.01]]] -// # 'input' has 5 nonzero values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `input` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.5j, 0.0 + 0.0j]] -// # [[0.0 + 0.0j, 0.25 + 1.5j] -// # [0.0 + 0.0j, 0.75 + 0.0j]] -// # [[0.0 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.0j, 0.01 + 0.0j]]] -// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] // ``` func Where(scope *Scope, input tf.Output) (index tf.Output) { if scope.Err() != nil { @@ -6994,194 +6966,6 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS return op.Output(0) } -// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. -type AllCandidateSamplerAttr func(optionalAttr) - -// AllCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to produce. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AllCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. -type DecodeAndCropJpegAttr func(optionalAttr) - -// DecodeAndCropJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeAndCropJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } -} - -// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["dct_method"] = value - } -} - -// Decode and Crop a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// It is equivalent to a combination of decode and crop, but much faster by only -// decoding partial jpeg image. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeAndCropJpeg", - Input: []tf.Input{ - contents, crop_window, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // DecodeJpegAttr is an optional argument to DecodeJpeg. type DecodeJpegAttr func(optionalAttr) @@ -11179,6 +10963,37 @@ func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_ return op.Output(0), op.Output(1), op.Output(2) } +// Computes the sign and the log of the absolute value of the determinant of +// +// one or more square matrices. +// +// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions +// form square matrices. The outputs are two tensors containing the signs and +// absolute values of the log determinants for all N input submatrices +// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). +// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU +// is the LU decomposition of the input and P is the corresponding +// permutation matrix. +// +// Arguments: +// input: Shape is `[N, M, M]`. +// +// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants +// of the N input matrices. Shape is `[N]`. +func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LogMatrixDeterminant", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // SetSizeAttr is an optional argument to SetSize. type SetSizeAttr func(optionalAttr) @@ -11590,28 +11405,400 @@ func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, return op.Output(0), op.Output(1) } -// Computes the gradient of the sigmoid of `x` wrt its input. +// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. +type AllCandidateSamplerAttr func(optionalAttr) + +// AllCandidateSamplerSeed sets the optional seed attribute to value. // -// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and -// `dy` is the corresponding input gradient. -func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value } - opspec := tf.OpSpec{ - Type: "SigmoidGrad", - Input: []tf.Input{ - y, dy, - }, +} + +// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Subtracts a value from the current value of a variable. +// Generates labels for candidate sampling with a learned unigram distribution. // -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to produce. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AllCandidateSampler", + Input: []tf.Input{ + true_classes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. +type DecodeAndCropJpegAttr func(optionalAttr) + +// DecodeAndCropJpegChannels sets the optional channels attribute to value. +// +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["channels"] = value + } +} + +// DecodeAndCropJpegRatio sets the optional ratio attribute to value. +// +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value + } +} + +// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. +// +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} + +// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value + } +} + +// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value + } +} + +// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. +// +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["dct_method"] = value + } +} + +// Decode and Crop a JPEG-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// It is equivalent to a combination of decode and crop, but much faster by only +// decoding partial jpeg image. +// +// Arguments: +// contents: 0-D. The JPEG-encoded image. +// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeAndCropJpeg", + Input: []tf.Input{ + contents, crop_window, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. +type RandomPoissonV2Attr func(optionalAttr) + +// RandomPoissonV2Seed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// RandomPoissonV2Dtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT64 +func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs random values from the Poisson distribution(s) described by rate. +// +// This op uses two algorithms, depending on rate. If rate >= 10, then +// the algorithm by Hormann is used to acquire samples via +// transformation-rejection. +// See http://www.sciencedirect.com/science/article/pii/0167668793909974. +// +// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +// random variables. +// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +// Programming, Volume 2. Addison Wesley +// +// Arguments: +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in rate. +// rate: A tensor in which each scalar is a "rate" parameter describing the +// associated poisson distribution. +// +// Returns A tensor with shape `shape + shape(rate)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `rate[i0, i1, ...iN]`. +func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomPoissonV2", + Input: []tf.Input{ + shape, rate, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. +type OrderedMapPeekAttr func(optionalAttr) + +// OrderedMapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op peeks at the values at the specified key. If the +// +// underlying container does not contain this key +// this op will block until it does. This Op is optimized for +// performance. +func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OrderedMapPeek", + Input: []tf.Input{ + key, indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("OrderedMapPeek", err) + return + } + return values +} + +// Adds two `SparseTensor` objects to produce another `SparseTensor`. +// +// The input `SparseTensor` objects' indices are assumed ordered in standard +// lexicographic order. If this is not the case, before this step run +// `SparseReorder` to restore index ordering. +// +// By default, if two values sum to zero at some index, the output `SparseTensor` +// would still include that particular location in its index, storing a zero in the +// corresponding value slot. To override this, callers can specify `thresh`, +// indicating that if the sum has a magnitude strictly smaller than `thresh`, its +// corresponding value and index would then not be included. In particular, +// `thresh == 0` (default) means everything is kept and actual thresholding happens +// only for a positive value. +// +// In the following shapes, `nnz` is the count after taking `thresh` into account. +// +// Arguments: +// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. +// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. +// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. +// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. +// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. +// thresh: 0-D. The magnitude threshold that determines if an output value/index +// pair takes space. +func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseAdd", + Input: []tf.Input{ + a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Computes the gradient of the sigmoid of `x` wrt its input. +// +// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and +// `dy` is the corresponding input gradient. +func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SigmoidGrad", + Input: []tf.Input{ + y, dy, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Subtracts a value from the current value of a variable. +// +// Any ReadVariableOp which depends directly or indirectly on this assign is +// guaranteed to see the incremented value or a subsequent newer one. // // Outputs the incremented value, which can be used to totally order the // increments to this variable. @@ -16263,80 +16450,6 @@ func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// Compute the polygamma function \\(\psi^{(n)}(x)\\). -// -// The polygamma function is defined as: -// -// -// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\) -// -// where \\(\psi(x)\\) is the digamma function. -func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Polygamma", - Input: []tf.Input{ - a, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors. -// -// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the -// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each -// input channel is processed independently of the others with its own structuring -// function. The `output` tensor has shape -// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output -// tensor depend on the `padding` algorithm. We currently only support the default -// "NHWC" `data_format`. -// -// In detail, the grayscale morphological 2-D dilation is the max-sum correlation -// (for consistency with `conv2d`, we use unmirrored filters): -// -// output[b, y, x, c] = -// max_{dy, dx} input[b, -// strides[1] * y + rates[1] * dy, -// strides[2] * x + rates[2] * dx, -// c] + -// filter[dy, dx, c] -// -// Max-pooling is a special case when the filter has size equal to the pooling -// kernel size and contains all zeros. -// -// Note on duality: The dilation of `input` by the `filter` is equal to the -// negation of the erosion of `-input` by the reflected `filter`. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// strides: The stride of the sliding window for each dimension of the input -// tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: The input stride for atrous morphological dilation. Must be: -// `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape `[batch, out_height, out_width, depth]`. -func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "Dilation2D", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a summary file writer accessible by the given resource handle. // // Arguments: @@ -16697,31 +16810,71 @@ func RealTout(value tf.DataType) RealAttr { // Returns the real part of a complex number. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the real part of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real -// part returned by this operation and *b* is the imaginary part. +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the real part of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real +// part returned by this operation and *b* is the imaginary part. +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.real(input) ==> [-2.25, 3.25] +// ``` +func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Real", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// 2D real-valued fast Fourier transform. +// +// Computes the 2-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 2 dimensions of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. +// +// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. // -// For example: +// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.real(input) ==> [-2.25, 3.25] -// ``` -func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.rfft2 +// @end_compatibility +func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Real", + Type: "RFFT2D", Input: []tf.Input{ - input, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -17139,117 +17292,6 @@ func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksi return op.Output(0) } -// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. -type OrderedMapPeekAttr func(optionalAttr) - -// OrderedMapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the -// -// underlying container does not contain this key -// this op will block until it does. This Op is optimized for -// performance. -func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapPeek", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapPeek", err) - return - } - return values -} - -// Adds two `SparseTensor` objects to produce another `SparseTensor`. -// -// The input `SparseTensor` objects' indices are assumed ordered in standard -// lexicographic order. If this is not the case, before this step run -// `SparseReorder` to restore index ordering. -// -// By default, if two values sum to zero at some index, the output `SparseTensor` -// would still include that particular location in its index, storing a zero in the -// corresponding value slot. To override this, callers can specify `thresh`, -// indicating that if the sum has a magnitude strictly smaller than `thresh`, its -// corresponding value and index would then not be included. In particular, -// `thresh == 0` (default) means everything is kept and actual thresholding happens -// only for a positive value. -// -// In the following shapes, `nnz` is the count after taking `thresh` into account. -// -// Arguments: -// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. -// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. -// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. -// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. -// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -// thresh: 0-D. The magnitude threshold that determines if an output value/index -// pair takes space. -func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseAdd", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Creates a dataset that batches `batch_size` elements from `input_dataset`. // // Arguments: @@ -20337,6 +20379,80 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// Compute the polygamma function \\(\psi^{(n)}(x)\\). +// +// The polygamma function is defined as: +// +// +// \\(\psi^{(n)}(x) = \frac{d^n}{dx^n} \psi(x)\\) +// +// where \\(\psi(x)\\) is the digamma function. +func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Polygamma", + Input: []tf.Input{ + a, x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors. +// +// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the +// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each +// input channel is processed independently of the others with its own structuring +// function. The `output` tensor has shape +// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output +// tensor depend on the `padding` algorithm. We currently only support the default +// "NHWC" `data_format`. +// +// In detail, the grayscale morphological 2-D dilation is the max-sum correlation +// (for consistency with `conv2d`, we use unmirrored filters): +// +// output[b, y, x, c] = +// max_{dy, dx} input[b, +// strides[1] * y + rates[1] * dy, +// strides[2] * x + rates[2] * dx, +// c] + +// filter[dy, dx, c] +// +// Max-pooling is a special case when the filter has size equal to the pooling +// kernel size and contains all zeros. +// +// Note on duality: The dilation of `input` by the `filter` is equal to the +// negation of the erosion of `-input` by the reflected `filter`. +// +// Arguments: +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// strides: The stride of the sliding window for each dimension of the input +// tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: The input stride for atrous morphological dilation. Must be: +// `[1, rate_height, rate_width, 1]`. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape `[batch, out_height, out_width, depth]`. +func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} + opspec := tf.OpSpec{ + Type: "Dilation2D", + Input: []tf.Input{ + input, filter, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AudioSpectrogramAttr is an optional argument to AudioSpectrogram. type AudioSpectrogramAttr func(optionalAttr) @@ -23117,46 +23233,6 @@ func Erfc(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// 2D real-valued fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 2 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. -// -// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft2 -// @end_compatibility -func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RFFT2D", - Input: []tf.Input{ - input, fft_length, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes sin of x element-wise. func Sin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { -- GitLab From f6b15b08bbedc500549b0793b236bc90289d07dc Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Wed, 4 Oct 2017 23:33:04 -0700 Subject: [PATCH 050/909] Update the tf.contrib.signal guide to include guidance on computing Mel spectrograms and MFCCs. PiperOrigin-RevId: 171113759 --- .../api_guides/python/contrib.signal.md | 127 +++++++++++++----- 1 file changed, 93 insertions(+), 34 deletions(-) diff --git a/tensorflow/docs_src/api_guides/python/contrib.signal.md b/tensorflow/docs_src/api_guides/python/contrib.signal.md index c16c5cb649..85ef3ad134 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.signal.md +++ b/tensorflow/docs_src/api_guides/python/contrib.signal.md @@ -1,16 +1,17 @@ # Signal Processing (contrib) [TOC] -@{tf.contrib.signal} is a module for signal processing primitives. All -operations have GPU support and are differentiable. +@{tf.contrib.signal} is a module for signal processing primitives. All +operations have GPU support and are differentiable. This module is especially +helpful for building TensorFlow models that process or generate audio, though +the techniques are useful in many domains. -# Common Tasks +## Framing variable length sequences -## Framing variable length sequences: - -When dealing with variable length signals (e.g. audio) it is common to -"frame" them into multiple fixed length, potentially overlapping windows. -@{tf.contrib.signal.frame} does exactly this. For example: +When dealing with variable length signals (e.g. audio) it is common to "frame" +them into multiple fixed length windows. These windows can overlap if the 'step' +of the frame is less than the frame length. @{tf.contrib.signal.frame} does +exactly this. For example: ```python # A batch of float32 time-domain signals in the range [-1, 1] with shape @@ -18,8 +19,9 @@ When dealing with variable length signals (e.g. audio) it is common to signals = tf.placeholder(tf.float32, [None, None]) # Compute a [batch_size, ?, 128] tensor of fixed length, overlapping windows -# where each window overlaps the previous by 50%. -frames = tf.contrib.signal.frame(signals, frame_length=128, frame_step=64) +# where each window overlaps the previous by 75% (frame_length - frame_step +# samples of overlap). +frames = tf.contrib.signal.frame(signals, frame_length=128, frame_step=32) ``` The `axis` parameter to @{tf.contrib.signal.frame} allows you to frame tensors @@ -27,54 +29,52 @@ with inner structure (e.g. a spectrogram): ```python # `magnitude_spectrograms` is a [batch_size, ?, 127] tensor of spectrograms. We -# would like to produce overlapping fixed-size spectrogram patches e.g. for use -# in a situation where a fixed size input is needed. +# would like to produce overlapping fixed-size spectrogram patches; for example, +# for use in a situation where a fixed size input is needed. magnitude_spectrograms = tf.abs(tf.contrib.signal.stft( - signals, frame_length=256, frame_step=128, fft_length=256)) + signals, frame_length=256, frame_step=64, fft_length=256)) -# `spectrogram_patches` is a [batch_size, ?, 64, 127] tensor containing a +# `spectrogram_patches` is a [batch_size, ?, 64, 127] tensor containing a # variable number of [64, 127] spectrogram patches per batch item. spectrogram_patches = tf.contrib.signal.frame( - magnitude_spectrograms, frame_length=64, frame_step=32, axis=1) + magnitude_spectrograms, frame_length=64, frame_step=16, axis=1) ``` -## Reconstructing framed sequences and applying a tapering window: +## Reconstructing framed sequences and applying a tapering window @{tf.contrib.signal.overlap_and_add} can be used to reconstruct a signal from a -framed representation produced in the above example. +framed representation. For example, the following code reconstructs the signal +produced in the preceding example: ```python # Reconstructs `signals` from `frames` produced in the above example. However, # the magnitude of `reconstructed_signals` will be greater than `signals`. -reconstructed_signals = tf.contrib.signal.overlap_and_add(frames, frame_step=64) +reconstructed_signals = tf.contrib.signal.overlap_and_add(frames, frame_step=32) ``` -Note that because `frame_step` is 50% of `frame_length` in the above example, +Note that because `frame_step` is 25% of `frame_length` in the above example, the resulting reconstruction will have a greater magnitude than the original -`signals`. - -To compensate for this, we can use a tapering window function. If the +`signals`. To compensate for this, we can use a tapering window function. If the window function satisfies the Constant Overlap-Add (COLA) property for the given frame step, then it will recover the original `signals`. @{tf.contrib.signal.hamming_window} and @{tf.contrib.signal.hann_window} both -satisfy the COLA property for a 50% overlap. +satisfy the COLA property for a 75% overlap. ```python frame_length = 128 -frame_step = 64 +frame_step = 32 windowed_frames = frames * tf.contrib.signal.hann_window(frame_length) reconstructed_signals = tf.contrib.signal.overlap_and_add( windowed_frames, frame_step) ``` -## Computing spectrograms: +## Computing spectrograms A spectrogram is a time-frequency decomposition of a signal that indicates its -frequency content over time. There are many variants on how to compute a -spectrogram, but the most common approach is by taking the magnitude of the -[Short-time Fourier Transform][stft] (STFT), which can be computed with -@{tf.contrib.signal.stft}. +frequency content over time. The most common approach to computing spectrograms +is to take the magnitude of the [Short-time Fourier Transform][stft] (STFT), +which @{tf.contrib.signal.stft} can compute as follows: ```python # A batch of float32 time-domain signals in the range [-1, 1] with shape @@ -82,7 +82,7 @@ spectrogram, but the most common approach is by taking the magnitude of the signals = tf.placeholder(tf.float32, [None, None]) # `stfts` is a complex64 Tensor representing the Short-time Fourier Transform of -# each signal in `signals`. Its shape is [batch_size, ?, fft_unique_bins] +# each signal in `signals`. Its shape is [batch_size, ?, fft_unique_bins] # where fft_unique_bins = fft_length // 2 + 1 = 513. stfts = tf.contrib.signal.stft(signals, frame_length=1024, frame_step=512, fft_length=1024) @@ -96,18 +96,77 @@ power_spectrograms = tf.real(stfts * tf.conj(stfts)) magnitude_spectrograms = tf.abs(stfts) ``` -## Logarithmic compression: +You may use a power spectrogram or a magnitude spectrogram; each has its +advantages. Note that if you apply logarithmic compression, the power +spectrogram and magnitude spectrogram will differ by a factor of 2. + +## Logarithmic compression It is common practice to apply a compressive nonlinearity such as a logarithm or -power-law compression to spectrograms. +power-law compression to spectrograms. This helps to balance the importance of +detail in low and high energy regions of the spectrum, which more closely +matches human auditory sensitivity. -When compressing with a logarithm, it's a good idea to use a stabilizing offset +When compressing with a logarithm, it's a good idea to use a stabilizing offset to avoid high dynamic ranges caused by the singularity at zero. ```python log_offset = 1e-6 log_magnitude_spectrograms = tf.log(magnitude_spectrograms + log_offset) -log_power_spectrograms = tf.log(power_spectrograms + log_offset) +``` + +## Computing log-mel spectrograms + +When working with spectral representations of audio, the [mel scale][mel] is a +common reweighting of the frequency dimension, which results in a +lower-dimensional and more perceptually-relevant representation of the audio. + +@{tf.contrib.signal.linear_to_mel_weight_matrix} produces a matrix you can use +to convert a spectrogram to the mel scale. + +```python +# Warp the linear-scale, magnitude spectrograms into the mel-scale. +num_spectrogram_bins = magnitude_spectrograms.shape[-1].value +lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 64 +linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix( + num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, + upper_edge_hertz) +mel_spectrograms = tf.tensordot( + magnitude_spectrograms, linear_to_mel_weight_matrix, 1) +# Note: Shape inference for `tf.tensordot` does not currently handle this case. +mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( + linear_to_mel_weight_matrix.shape[-1:])) +``` + +If desired, compress the mel spectrogram magnitudes. For example, you may use +logarithmic compression (as discussed in the previous section). + +Order matters! Compressing the spectrogram magnitudes after +reweighting the frequencies is different from reweighting the compressed +spectrogram magnitudes. According to the perceptual justification of the mel +scale, conversion from linear scale entails summing intensity or energy among +adjacent bands, i.e. it should be applied before logarithmic compression. Taking +the weighted sum of log-compressed values amounts to multiplying the +pre-logarithm values, which rarely, if ever, makes sense. + +```python +log_offset = 1e-6 +log_mel_spectrograms = tf.log(mel_spectrograms + log_offset) +``` + +## Computing Mel-Frequency Cepstral Coefficients (MFCCs) + +Call @{tf.contrib.signal.mfccs_from_log_mel_spectrograms} to compute +[MFCCs][mfcc] from log-magnitude, mel-scale spectrograms (as computed in the +preceding example): + +```python +num_mfccs = 13 +# Keep the first `num_mfccs` MFCCs. +mfccs = tf.contrib.signal.mfccs_from_log_mel_spectrograms( + log_mel_spectrograms)[..., :num_mfccs] ``` [stft]: https://en.wikipedia.org/wiki/Short-time_Fourier_transform +[mel]: https://en.wikipedia.org/wiki/Mel_scale +[mfcc]: https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -- GitLab From 220515bffdf1df5379a7f8921f5a12deb2e0dee7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 03:46:13 -0700 Subject: [PATCH 051/909] Replace owning raw pointers with unique pointers PiperOrigin-RevId: 171132628 --- tensorflow/c/checkpoint_reader.cc | 26 ++++++++++---------------- tensorflow/c/checkpoint_reader.h | 15 ++++++++------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc index e7b9bca5b5..fc86e92f3b 100644 --- a/tensorflow/c/checkpoint_reader.cc +++ b/tensorflow/c/checkpoint_reader.cc @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/core/util/saved_tensor_slice_util.h" namespace tensorflow { - namespace checkpoint { class TensorSliceReader; @@ -37,30 +36,24 @@ CheckpointReader::CheckpointReader(const string& filename, std::vector v2_path; if (Env::Default()->GetMatchingPaths(MetaFilename(filename), &v2_path).ok() && !v2_path.empty()) { - v2_reader_ = - new BundleReader(Env::Default(), filename /* prefix to a V2 ckpt */); + v2_reader_.reset( + new BundleReader(Env::Default(), filename /* prefix to a V2 ckpt */)); if (!v2_reader_->status().ok()) { Set_TF_Status_from_Status(out_status, v2_reader_->status()); return; } var_to_shape_map_ptr_ = BuildV2VarToShapeMap(); } else { - reader_ = new TensorSliceReader(filename); + reader_.reset(new TensorSliceReader(filename)); if (!reader_->status().ok()) { Set_TF_Status_from_Status(out_status, reader_->status()); return; } - var_to_shape_map_ptr_ = - new TensorSliceReader::VarToShapeMap(reader_->GetVariableToShapeMap()); + var_to_shape_map_ptr_.reset( + new TensorSliceReader::VarToShapeMap(reader_->GetVariableToShapeMap())); } } -CheckpointReader::~CheckpointReader() { - delete var_to_shape_map_ptr_; - delete reader_; - delete v2_reader_; -} - bool CheckpointReader::HasTensor(const string& name) const { if (reader_ != nullptr) { return reader_->HasTensor(name, nullptr, nullptr); @@ -100,7 +93,8 @@ void CheckpointReader::GetTensor( } } -TensorSliceReader::VarToShapeMap* CheckpointReader::BuildV2VarToShapeMap() { +std::unique_ptr +CheckpointReader::BuildV2VarToShapeMap() { CHECK(v2_reader_ != nullptr); CHECK(v2_reader_->status().ok()); @@ -123,8 +117,8 @@ TensorSliceReader::VarToShapeMap* CheckpointReader::BuildV2VarToShapeMap() { } // Second pass: adds the entries, ignoring the filtered keys. - TensorSliceReader::VarToShapeMap* var_to_shape_map = - new TensorSliceReader::VarToShapeMap; + std::unique_ptr var_to_shape_map( + new TensorSliceReader::VarToShapeMap); v2_reader_->Seek(kHeaderEntryKey); for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) { if (filtered_keys.count(v2_reader_->key().ToString()) > 0) continue; @@ -134,7 +128,7 @@ TensorSliceReader::VarToShapeMap* CheckpointReader::BuildV2VarToShapeMap() { (*var_to_shape_map)[v2_reader_->key().ToString()] = TensorShape(entry.shape()); } - return var_to_shape_map; // Owned by caller. + return var_to_shape_map; } } // namespace checkpoint diff --git a/tensorflow/c/checkpoint_reader.h b/tensorflow/c/checkpoint_reader.h index 1124416380..470c8d1e10 100644 --- a/tensorflow/c/checkpoint_reader.h +++ b/tensorflow/c/checkpoint_reader.h @@ -16,6 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_C_CHECKPOINT_READER_H #define TENSORFLOW_C_CHECKPOINT_READER_H +#include +#include + #include "tensorflow/c/tf_status_helper.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/lib/core/status.h" @@ -24,7 +27,6 @@ limitations under the License. #include "tensorflow/core/util/tensor_slice_reader.h" namespace tensorflow { - namespace checkpoint { class TensorSliceReader; @@ -38,7 +40,6 @@ class TensorSliceReader; class CheckpointReader { public: CheckpointReader(const string& filepattern, TF_Status* out_status); - ~CheckpointReader(); bool HasTensor(const string& name) const; const string DebugString() const; @@ -56,12 +57,12 @@ class CheckpointReader { private: // Uses "v2_reader_" to build a "var name -> shape" map; owned by caller. // REQUIRES: "v2_reader_ != nullptr && v2_reader_.status().ok()". - TensorSliceReader::VarToShapeMap* BuildV2VarToShapeMap(); + std::unique_ptr BuildV2VarToShapeMap(); - // Invariant: exactly one of "reader_" and "v2_reader_" is non-nullptr. - TensorSliceReader* reader_; // Owned. - BundleReader* v2_reader_; // Owned. - TensorSliceReader::VarToShapeMap* var_to_shape_map_ptr_; // Owned. + // Invariant: exactly one of "reader_" and "v2_reader_" is non-null. + std::unique_ptr reader_; + std::unique_ptr v2_reader_; + std::unique_ptr var_to_shape_map_ptr_; TF_DISALLOW_COPY_AND_ASSIGN(CheckpointReader); }; -- GitLab From a8c5d5fe011e796593d20c74d8b927c014a27c89 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 06:57:46 -0700 Subject: [PATCH 052/909] Expose data type information in checkpoint reader. PiperOrigin-RevId: 171147196 --- tensorflow/c/checkpoint_reader.cc | 40 ++++++++++++++----- tensorflow/c/checkpoint_reader.h | 17 ++++++-- tensorflow/core/util/tensor_slice_reader.cc | 13 +++++- tensorflow/core/util/tensor_slice_reader.h | 5 +++ tensorflow/python/util/py_checkpoint_reader.i | 38 ++++++++++++++++++ 5 files changed, 98 insertions(+), 15 deletions(-) diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc index fc86e92f3b..b1f7bdaa54 100644 --- a/tensorflow/c/checkpoint_reader.cc +++ b/tensorflow/c/checkpoint_reader.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/c/checkpoint_reader.h" #include +#include #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -30,7 +31,10 @@ class TensorSliceReader; CheckpointReader::CheckpointReader(const string& filename, TF_Status* out_status) - : reader_(nullptr), v2_reader_(nullptr), var_to_shape_map_ptr_(nullptr) { + : reader_(nullptr), + v2_reader_(nullptr), + var_to_shape_map_(nullptr), + var_to_data_type_map_(nullptr) { // Depending on whether this is a V2 ckpt, initializes "reader_" or // "v2_reader_". std::vector v2_path; @@ -42,15 +46,19 @@ CheckpointReader::CheckpointReader(const string& filename, Set_TF_Status_from_Status(out_status, v2_reader_->status()); return; } - var_to_shape_map_ptr_ = BuildV2VarToShapeMap(); + auto result = BuildV2VarMaps(); + var_to_shape_map_.swap(result.first); + var_to_data_type_map_.swap(result.second); } else { reader_.reset(new TensorSliceReader(filename)); if (!reader_->status().ok()) { Set_TF_Status_from_Status(out_status, reader_->status()); return; } - var_to_shape_map_ptr_.reset( + var_to_shape_map_.reset( new TensorSliceReader::VarToShapeMap(reader_->GetVariableToShapeMap())); + var_to_data_type_map_.reset(new TensorSliceReader::VarToDataTypeMap( + reader_->GetVariableToDataTypeMap())); } } @@ -63,8 +71,14 @@ bool CheckpointReader::HasTensor(const string& name) const { const TensorSliceReader::VarToShapeMap& CheckpointReader::GetVariableToShapeMap() const { - CHECK(var_to_shape_map_ptr_); - return *var_to_shape_map_ptr_; + CHECK(var_to_shape_map_); + return *var_to_shape_map_; +} + +const TensorSliceReader::VarToDataTypeMap& +CheckpointReader::GetVariableToDataTypeMap() const { + CHECK(var_to_data_type_map_); + return *var_to_data_type_map_; } const string CheckpointReader::DebugString() const { @@ -93,8 +107,9 @@ void CheckpointReader::GetTensor( } } -std::unique_ptr -CheckpointReader::BuildV2VarToShapeMap() { +std::pair, + std::unique_ptr> +CheckpointReader::BuildV2VarMaps() { CHECK(v2_reader_ != nullptr); CHECK(v2_reader_->status().ok()); @@ -119,16 +134,21 @@ CheckpointReader::BuildV2VarToShapeMap() { // Second pass: adds the entries, ignoring the filtered keys. std::unique_ptr var_to_shape_map( new TensorSliceReader::VarToShapeMap); + std::unique_ptr var_to_data_type_map( + new TensorSliceReader::VarToDataTypeMap); v2_reader_->Seek(kHeaderEntryKey); for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) { if (filtered_keys.count(v2_reader_->key().ToString()) > 0) continue; CHECK(entry.ParseFromArray(v2_reader_->value().data(), v2_reader_->value().size())) << entry.InitializationErrorString(); - (*var_to_shape_map)[v2_reader_->key().ToString()] = - TensorShape(entry.shape()); + string key = v2_reader_->key().ToString(); + (*var_to_shape_map)[key] = TensorShape(entry.shape()); + (*var_to_data_type_map)[key] = DataType(entry.dtype()); } - return var_to_shape_map; + // The returned pointers are owned by the caller. + return std::make_pair(std::move(var_to_shape_map), + std::move(var_to_data_type_map)); } } // namespace checkpoint diff --git a/tensorflow/c/checkpoint_reader.h b/tensorflow/c/checkpoint_reader.h index 470c8d1e10..4de1300a7f 100644 --- a/tensorflow/c/checkpoint_reader.h +++ b/tensorflow/c/checkpoint_reader.h @@ -44,10 +44,14 @@ class CheckpointReader { bool HasTensor(const string& name) const; const string DebugString() const; - // Returns a map from variable names to its shape. Slices of a partitioned + // Returns a map from variable names to their shapes. Slices of a partitioned // tensor are combined into a single entry. const TensorSliceReader::VarToShapeMap& GetVariableToShapeMap() const; + // Returns a map from variable names to their data types. Slices of a + // partitioned tensor are combined into a single entry. + const TensorSliceReader::VarToDataTypeMap& GetVariableToDataTypeMap() const; + // Attempts to look up the tensor named "name" and stores the found result in // "out_tensor". void GetTensor(const string& name, @@ -55,14 +59,19 @@ class CheckpointReader { TF_Status* out_status) const; private: - // Uses "v2_reader_" to build a "var name -> shape" map; owned by caller. + // Uses "v2_reader_" to build "var name -> shape" and "var name -> data type" + // maps; both owned by caller. // REQUIRES: "v2_reader_ != nullptr && v2_reader_.status().ok()". - std::unique_ptr BuildV2VarToShapeMap(); + std::pair, + std::unique_ptr > + BuildV2VarMaps(); // Invariant: exactly one of "reader_" and "v2_reader_" is non-null. std::unique_ptr reader_; std::unique_ptr v2_reader_; - std::unique_ptr var_to_shape_map_ptr_; + + std::unique_ptr var_to_shape_map_; + std::unique_ptr var_to_data_type_map_; TF_DISALLOW_COPY_AND_ASSIGN(CheckpointReader); }; diff --git a/tensorflow/core/util/tensor_slice_reader.cc b/tensorflow/core/util/tensor_slice_reader.cc index cd49034719..c6dda2ec29 100644 --- a/tensorflow/core/util/tensor_slice_reader.cc +++ b/tensorflow/core/util/tensor_slice_reader.cc @@ -278,13 +278,24 @@ TensorSliceReader::VarToShapeMap TensorSliceReader::GetVariableToShapeMap() const { VarToShapeMap name_to_shape; if (status().ok()) { - for (auto e : Tensors()) { + for (auto& e : Tensors()) { name_to_shape[e.first] = e.second->shape(); } } return name_to_shape; } +TensorSliceReader::VarToDataTypeMap +TensorSliceReader::GetVariableToDataTypeMap() const { + VarToDataTypeMap name_to_dtype; + if (status().ok()) { + for (auto& e : Tensors()) { + name_to_dtype[e.first] = e.second->type(); + } + } + return name_to_dtype; +} + const string TensorSliceReader::DebugString() const { string shape_str; if (status().ok()) { diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h index 5932d59a15..4bb2b24615 100644 --- a/tensorflow/core/util/tensor_slice_reader.h +++ b/tensorflow/core/util/tensor_slice_reader.h @@ -103,9 +103,14 @@ class TensorSliceReader { std::unique_ptr* out_tensor) const; typedef std::unordered_map VarToShapeMap; + typedef std::unordered_map VarToDataTypeMap; + // Returns a map from tensor name to shape. VarToShapeMap GetVariableToShapeMap() const; + // Returns a map from tensor name to data type. + VarToDataTypeMap GetVariableToDataTypeMap() const; + // Returns a string containing names and shapes of all the tensors. const string DebugString() const; diff --git a/tensorflow/python/util/py_checkpoint_reader.i b/tensorflow/python/util/py_checkpoint_reader.i index 1d20f9756f..0cd095d9d9 100644 --- a/tensorflow/python/util/py_checkpoint_reader.i +++ b/tensorflow/python/util/py_checkpoint_reader.i @@ -68,6 +68,38 @@ limitations under the License. $result = output_map.release(); } +%typemap(out) const tensorflow::checkpoint::TensorSliceReader::VarToDataTypeMap& { + tensorflow::Safe_PyObjectPtr output_map(tensorflow::make_safe(PyDict_New())); + for (auto v : *$1) { +%#if PY_MAJOR_VERSION >= 3 + tensorflow::Safe_PyObjectPtr key( + tensorflow::make_safe(PyUnicode_FromStringAndSize(v.first.c_str(), v.first.size()))); +%#else + tensorflow::Safe_PyObjectPtr key( + tensorflow::make_safe(PyString_FromStringAndSize(v.first.c_str(), v.first.size()))); +%#endif + if (!key) { + SWIG_fail; + } +%#if PY_MAJOR_VERSION >= 3 + tensorflow::Safe_PyObjectPtr value(tensorflow::make_safe(PyLong_FromLong(v.second))); +%#else + tensorflow::Safe_PyObjectPtr value(tensorflow::make_safe(PyInt_FromLong(v.second))); +%#endif + if (!value) { + SWIG_fail; + } + if (PyDict_SetItem(output_map.get(), key.get(), value.get()) == -1) { + SWIG_fail; + } else { + key.release(); + value.release(); + } + } + + $result = output_map.release(); +} + %{ static PyObject* CheckpointReader_GetTensor( tensorflow::checkpoint::CheckpointReader* reader, @@ -102,11 +134,17 @@ PyObject* CheckpointReader_GetTensor( %unignore tensorflow::checkpoint::CheckpointReader::~CheckpointReader; %rename("debug_string") tensorflow::checkpoint::CheckpointReader::DebugString; %rename("get_variable_to_shape_map") tensorflow::checkpoint::CheckpointReader::GetVariableToShapeMap; +%rename("_GetVariableToDataTypeMap") tensorflow::checkpoint::CheckpointReader::GetVariableToDataTypeMap; %rename("_HasTensor") tensorflow::checkpoint::CheckpointReader::HasTensor; %unignore CheckpointReader_GetTensor; %extend tensorflow::checkpoint::CheckpointReader { %insert("python") %{ + def get_variable_to_dtype_map(self): + from tensorflow.python.framework import dtypes + return {name: dtypes.DType(type_enum) + for name, type_enum in self._GetVariableToDataTypeMap().items()} + def has_tensor(self, tensor_str): from tensorflow.python.util import compat return self._HasTensor(compat.as_bytes(tensor_str)) -- GitLab From 6cf9ffeab4da4ad38bdf2afd803bf44cdc58d15d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 5 Oct 2017 07:50:09 -0700 Subject: [PATCH 053/909] Removes use of _grad_fn_accepts_none_for_indices in magic_gradient_function. Leaves the one in imperative_grad, which seems to matter. PiperOrigin-RevId: 171152474 --- tensorflow/python/eager/backprop.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 55df6496ed..5e3af16fb2 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -388,12 +388,6 @@ def _magic_gradient_function(op_name, attr_tuple, num_inputs, if grad_fn is None: return [None] * num_inputs - none_indices = _grad_fn_accepts_none_for_indices.get(op_name, []) - out_grads = [ - o if (o is not None or i in none_indices) - else array_ops.zeros_like(outputs[i]) - for i, o in enumerate(out_grads) - ] return grad_fn(mock_op, *out_grads) -- GitLab From 7d9f8ffdcaf48968b137f7e785d04a689436449f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 07:52:36 -0700 Subject: [PATCH 054/909] Make a branch of the KMeans estimator that is ported to the core Estimator API. PiperOrigin-RevId: 171152686 --- tensorflow/contrib/cmake/tf_tests.cmake | 1 + tensorflow/contrib/factorization/BUILD | 24 + tensorflow/contrib/factorization/__init__.py | 12 +- .../factorization/python/ops/kmeans.py | 417 +++++++++++++ .../factorization/python/ops/kmeans_test.py | 575 ++++++++++++++++++ 5 files changed, 1024 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/factorization/python/ops/kmeans.py create mode 100644 tensorflow/contrib/factorization/python/ops/kmeans_test.py diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 658d19e493..55d57b7574 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -296,6 +296,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py" "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py" "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py" + "${tensorflow_source_dir}/tensorflow/contrib/factorization/python/ops/kmeans_test.py" "${tensorflow_source_dir}/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py" # Failing with TF 1.3 (TODO) "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/estimator_test.py" diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index c468c544d3..8a7825c614 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -8,6 +8,7 @@ exports_files(["LICENSE"]) package(default_visibility = ["//tensorflow:__subpackages__"]) +load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") @@ -23,6 +24,7 @@ tf_custom_op_py_library( "python/ops/factorization_ops.py", "python/ops/gmm.py", "python/ops/gmm_ops.py", + "python/ops/kmeans.py", "python/ops/wals.py", ], dso = [ @@ -199,6 +201,28 @@ tf_py_test( ) # Estimators tests +py_test( + name = "kmeans_test", + size = "medium", + srcs = ["python/ops/kmeans_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":factorization_py", + ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:platform_benchmark", + "//tensorflow/python:random_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "wals_test", size = "large", diff --git a/tensorflow/contrib/factorization/__init__.py b/tensorflow/contrib/factorization/__init__.py index 486c2ea933..6112c9d830 100644 --- a/tensorflow/contrib/factorization/__init__.py +++ b/tensorflow/contrib/factorization/__init__.py @@ -23,22 +23,24 @@ from tensorflow.contrib.factorization.python.ops.clustering_ops import * from tensorflow.contrib.factorization.python.ops.factorization_ops import * from tensorflow.contrib.factorization.python.ops.gmm import * from tensorflow.contrib.factorization.python.ops.gmm_ops import * +from tensorflow.contrib.factorization.python.ops.kmeans import * from tensorflow.contrib.factorization.python.ops.wals import * # pylint: enable=wildcard-import from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'KMeans', 'COSINE_DISTANCE', - 'KMEANS_PLUS_PLUS_INIT', - 'RANDOM_INIT', - 'SQUARED_EUCLIDEAN_DISTANCE', - 'WALSModel', 'GMM', 'gmm', 'GmmAlgorithm', + 'KMeans', + 'KMEANS_PLUS_PLUS_INIT', + 'KMeansClustering', + 'RANDOM_INIT', + 'SQUARED_EUCLIDEAN_DISTANCE', 'WALSMatrixFactorization', + 'WALSModel', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py new file mode 100644 index 0000000000..6284768bdd --- /dev/null +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -0,0 +1,417 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A canned Estimator for k-means clustering.""" + +# TODO(ccolby): Move clustering_ops.py into this file and streamline the code. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +import numpy as np + +from tensorflow.contrib.factorization.python.ops import clustering_ops +from tensorflow.python.estimator import estimator +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics +from tensorflow.python.ops import state_ops +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.summary import summary +from tensorflow.python.training import session_run_hook +from tensorflow.python.training import training_util + + +class _LossRelativeChangeHook(session_run_hook.SessionRunHook): + """Stops when the change in loss goes below a tolerance.""" + + def __init__(self, loss_tensor, tolerance): + """Creates a _LossRelativeChangeHook. + + Args: + loss_tensor: A scalar tensor of the loss value. + tolerance: A relative tolerance of loss change between iterations. + """ + self._loss_tensor = loss_tensor + self._tolerance = tolerance + self._prev_loss = None + + def before_run(self, run_context): + del run_context # unused + return session_run_hook.SessionRunArgs(self._loss_tensor) + + def after_run(self, run_context, run_values): + loss = run_values.results + assert loss is not None + if self._prev_loss: + relative_change = (abs(loss - self._prev_loss) / + (1 + abs(self._prev_loss))) + if relative_change < self._tolerance: + run_context.request_stop() + self._prev_loss = loss + + +class _InitializeClustersHook(session_run_hook.SessionRunHook): + """Initializes the cluster centers. + + The chief repeatedly invokes an initialization op until all cluster centers + are initialized. The workers wait for the initialization phase to complete. + """ + + def __init__(self, init_op, is_initialized_var, is_chief): + """Creates an _InitializeClustersHook. + + Args: + init_op: An op that, when run, will choose some initial cluster centers. + This op may need to be run multiple times to choose all the centers. + is_initialized_var: A boolean variable reporting whether all initial + centers have been chosen. + is_chief: A boolean specifying whether this task is the chief. + """ + self._init_op = init_op + self._is_initialized_var = is_initialized_var + self._is_chief = is_chief + + def after_create_session(self, session, coord): + del coord # unused + assert self._init_op.graph is ops.get_default_graph() + assert self._is_initialized_var.graph is self._init_op.graph + while True: + try: + if session.run(self._is_initialized_var): + break + elif self._is_chief: + session.run(self._init_op) + else: + time.sleep(1) + except RuntimeError as e: + logging.info(e) + + +def _parse_tensor_or_dict(features): + """Helper function to convert the input points into a usable format. + + Args: + features: The input points. + + Returns: + If `features` is a dict of `k` features, each of which is a vector of `n` + scalars, the return value is a Tensor of shape `(n, k)` representing `n` + input points, where the items in the `k` dimension are sorted + lexicographically by `features` key. If `features` is not a dict, it is + returned unmodified. + """ + if isinstance(features, dict): + keys = sorted(features.keys()) + with ops.colocate_with(features[keys[0]]): + features = array_ops.concat([features[k] for k in keys], axis=1) + return features + + +class _ModelFn(object): + """Model function for the estimator.""" + + def __init__(self, num_clusters, initial_clusters, distance_metric, + random_seed, use_mini_batch, mini_batch_steps_per_iteration, + kmeans_plus_plus_num_retries, relative_tolerance): + self._num_clusters = num_clusters + self._initial_clusters = initial_clusters + self._distance_metric = distance_metric + self._random_seed = random_seed + self._use_mini_batch = use_mini_batch + self._mini_batch_steps_per_iteration = mini_batch_steps_per_iteration + self._kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries + self._relative_tolerance = relative_tolerance + + def model_fn(self, features, mode, config): + """Model function for the estimator. + + Note that this does not take a `1abels` arg. This works, but `input_fn` must + return either `features` or, equivalently, `(features, None)`. + + Args: + features: The input points. See @{tf.estimator.Estimator}. + mode: See @{tf.estimator.Estimator}. + config: See @{tf.estimator.Estimator}. + + Returns: + A @{tf.estimator.EstimatorSpec} (see @{tf.estimator.Estimator}) specifying + this behavior: + * `train_op`: Execute one mini-batch or full-batch run of Lloyd's + algorithm. + * `loss`: The sum of the squared distances from each input point to its + closest center. + * `eval_metric_ops`: Maps `SCORE` to `loss`. + * `predictions`: Maps `ALL_DISTANCES` to the distance from each input + point to each cluster center; maps `CLUSTER_INDEX` to the index of + the closest cluster center for each input point; maps `CLUSTERS` to + the cluster centers (which ignores the input points). + """ + # input_points is a single Tensor. Therefore, the sharding functionality + # in clustering_ops is unused, and some of the values below are lists of a + # single item. + input_points = _parse_tensor_or_dict(features) + + # Let N = the number of input_points. + # all_distances: A list of one matrix of shape (N, num_clusters). Each value + # is the distance from an input point to a cluster center. + # model_predictions: A list of one vector of shape (N). Each value is the + # cluster id of an input point. + # losses: Similar to cluster_idx but provides the distance to the cluster + # center. + # is_initialized: scalar indicating whether the initial cluster centers + # have been chosen; see init_op. + # cluster_centers_var: a Variable containing the cluster centers. + # init_op: an op to choose the initial cluster centers. A single worker + # repeatedly executes init_op until is_initialized becomes True. + # training_op: an op that runs an iteration of training, either an entire + # Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers + # may execute this op, but only after is_initialized becomes True. + (all_distances, model_predictions, losses, is_initialized, + cluster_centers_var, init_op, training_op) = clustering_ops.KMeans( + inputs=input_points, + num_clusters=self._num_clusters, + initial_clusters=self._initial_clusters, + distance_metric=self._distance_metric, + use_mini_batch=self._use_mini_batch, + mini_batch_steps_per_iteration=self._mini_batch_steps_per_iteration, + random_seed=self._random_seed, + kmeans_plus_plus_num_retries=self._kmeans_plus_plus_num_retries + ).training_graph() + + loss = math_ops.reduce_sum(losses) + summary.scalar('loss/raw', loss) + + incr_step = state_ops.assign_add(training_util.get_global_step(), 1) + training_op = control_flow_ops.with_dependencies([training_op, incr_step], + loss) + + training_hooks = [ + _InitializeClustersHook(init_op, is_initialized, config.is_chief) + ] + if self._relative_tolerance is not None: + training_hooks.append( + _LossRelativeChangeHook(loss, self._relative_tolerance)) + + return model_fn_lib.EstimatorSpec( + mode=mode, + predictions={ + KMeansClustering.ALL_DISTANCES: all_distances[0], + KMeansClustering.CLUSTER_INDEX: model_predictions[0], + KMeansClustering.CLUSTERS: cluster_centers_var.value(), + }, + loss=loss, + train_op=training_op, + eval_metric_ops={KMeansClustering.SCORE: metrics.mean(loss)}, + training_hooks=training_hooks) + + +# TODO(agarwal,ands): support sharded input. +class KMeansClustering(estimator.Estimator): + """An Estimator for K-Means clustering.""" + + # Valid values for the distance_metric constructor argument. + SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE + COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE + + # Values for initial_clusters constructor argument. + RANDOM_INIT = clustering_ops.RANDOM_INIT + KMEANS_PLUS_PLUS_INIT = clustering_ops.KMEANS_PLUS_PLUS_INIT + + # Metric returned by evaluate(): The sum of the squared distances from each + # input point to its closest center. + SCORE = 'score' + + # Keys returned by predict(). + # ALL_DISTANCES: The distance from each input point to each cluster center. + # CLUSTER_INDEX: The index of the closest cluster center for each input point. + # CLUSTERS: The cluster centers (which ignores the input points). + CLUSTER_INDEX = 'cluster_index' + CLUSTERS = 'clusters' + ALL_DISTANCES = 'all_distances' + + def __init__(self, + num_clusters, + model_dir=None, + initial_clusters=RANDOM_INIT, + distance_metric=SQUARED_EUCLIDEAN_DISTANCE, + random_seed=0, + use_mini_batch=True, + mini_batch_steps_per_iteration=1, + kmeans_plus_plus_num_retries=2, + relative_tolerance=None, + config=None): + """Creates an Estimator for running KMeans training and inference. + + This Estimator implements the following variants of the K-means algorithm: + + If `use_mini_batch` is False, it runs standard full batch K-means. Each + training step runs a single iteration of K-Means and must process the full + input at once. To run in this mode, the `input_fn` passed to `train` must + return the entire input dataset. + + If `use_mini_batch` is True, it runs a generalization of the mini-batch + K-means algorithm. It runs multiple iterations, where each iteration is + composed of `mini_batch_steps_per_iteration` steps. Each training step + accumulates the contribution from one mini-batch into temporary storage. + Every `mini_batch_steps_per_iteration` steps, the cluster centers are + updated and the temporary storage cleared for the next iteration. Note + that: + * If `mini_batch_steps_per_iteration=1`, the algorithm reduces to the + standard K-means mini-batch algorithm. + * If `mini_batch_steps_per_iteration = num_inputs / batch_size`, the + algorithm becomes an asynchronous version of the full-batch algorithm. + However, there is no guarantee by this implementation that each input + is seen exactly once per iteration. Also, different updates are applied + asynchronously without locking. So this asynchronous version may not + behave exactly like a full-batch version. + + Args: + num_clusters: An integer tensor specifying the number of clusters. This + argument is ignored if `initial_clusters` is a tensor or numpy array. + model_dir: The directory to save the model results and log files. + initial_clusters: Specifies how the initial cluster centers are chosen. + One of the following: + * a tensor or numpy array with the initial cluster centers. + * a callable `f(inputs, k)` that selects and returns up to `k` centers + from an input batch. `f` is free to return any number of centers + from `0` to `k`. It will be invoked on successive input batches + as necessary until all `num_clusters` centers are chosen. + * `KMeansClustering.RANDOM_INIT`: Choose centers randomly from an input + batch. If the batch size is less than `num_clusters` then the + entire batch is chosen to be initial cluster centers and the + remaining centers are chosen from successive input batches. + * `KMeansClustering.KMEANS_PLUS_PLUS_INIT`: Use kmeans++ to choose + centers from the first input batch. If the batch size is less + than `num_clusters`, a TensorFlow runtime error occurs. + distance_metric: The distance metric used for clustering. One of: + * `KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`: Euclidean distance + between vectors `u` and `v` is defined as `||u - v||_2` which is + the square root of the sum of the absolute squares of the elements' + difference. + * `KMeansClustering.COSINE_DISTANCE`: Cosine distance between vectors + `u` and `v` is defined as `1 - (u . v) / (||u||_2 ||v||_2)`. + random_seed: Python integer. Seed for PRNG used to initialize centers. + use_mini_batch: A boolean specifying whether to use the mini-batch k-means + algorithm. See explanation above. + mini_batch_steps_per_iteration: The number of steps after which the + updated cluster centers are synced back to a master copy. Used only if + `use_mini_batch=True`. See explanation above. + kmeans_plus_plus_num_retries: For each point that is sampled during + kmeans++ initialization, this parameter specifies the number of + additional points to draw from the current distribution before selecting + the best. If a negative value is specified, a heuristic is used to + sample `O(log(num_to_sample))` additional points. Used only if + `initial_clusters=KMeansClustering.KMEANS_PLUS_PLUS_INIT`. + relative_tolerance: A relative tolerance of change in the loss between + iterations. Stops learning if the loss changes less than this amount. + This may not work correctly if `use_mini_batch=True`. + config: See @{tf.estimator.Estimator}. + + Raises: + ValueError: An invalid argument was passed to `initial_clusters` or + `distance_metric`. + """ + if isinstance(initial_clusters, str) and initial_clusters not in [ + KMeansClustering.RANDOM_INIT, KMeansClustering.KMEANS_PLUS_PLUS_INIT + ]: + raise ValueError( + "Unsupported initialization algorithm '%s'" % initial_clusters) + if distance_metric not in [ + KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + KMeansClustering.COSINE_DISTANCE + ]: + raise ValueError("Unsupported distance metric '%s'" % distance_metric) + super(KMeansClustering, self).__init__( + model_fn=_ModelFn( + num_clusters, initial_clusters, distance_metric, random_seed, + use_mini_batch, mini_batch_steps_per_iteration, + kmeans_plus_plus_num_retries, relative_tolerance).model_fn, + model_dir=model_dir, + config=config) + + def _predict_one_key(self, input_fn, predict_key): + for result in self.predict(input_fn=input_fn, predict_keys=[predict_key]): + yield result[predict_key] + + def predict_cluster_index(self, input_fn): + """Finds the index of the closest cluster center to each input point. + + Args: + input_fn: Input points. See @{tf.estimator.Estimator.predict}. + + Yields: + The index of the closest cluster center for each input point. + """ + for index in self._predict_one_key(input_fn, + KMeansClustering.CLUSTER_INDEX): + yield index + + def score(self, input_fn): + """Returns the sum of squared distances to nearest clusters. + + Note that this function is different from the corresponding one in sklearn + which returns the negative sum. + + Args: + input_fn: Input points. See @{tf.estimator.Estimator.evaluate}. Only one + batch is retrieved. + + Returns: + The sum of the squared distance from each point in the first batch of + inputs to its nearest cluster center. + """ + return self.evaluate(input_fn=input_fn, steps=1)[KMeansClustering.SCORE] + + def transform(self, input_fn): + """Transforms each input point to its distances to all cluster centers. + + Note that if `distance_metric=KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE`, + this + function returns the squared Euclidean distance while the corresponding + sklearn function returns the Euclidean distance. + + Args: + input_fn: Input points. See @{tf.estimator.Estimator.predict}. + + Yields: + The distances from each input point to each cluster center. + """ + for distances in self._predict_one_key(input_fn, + KMeansClustering.ALL_DISTANCES): + yield distances + + def cluster_centers(self): + """Returns the cluster centers.""" + + # TODO(ccolby): Fix this clunky code once cl/168262087 is submitted. + # Discussion: go/estimator-get-variable-value + class RunOnceHook(session_run_hook.SessionRunHook): + """Stops after a single run.""" + + def after_run(self, run_context, run_values): + del run_values # unused + run_context.request_stop() + + result = self.predict( + input_fn=lambda: (constant_op.constant([], shape=[0, 1]), None), + predict_keys=[KMeansClustering.CLUSTERS], + hooks=[RunOnceHook()]) + return np.array([r[KMeansClustering.CLUSTERS] for r in result]) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans_test.py b/tensorflow/contrib/factorization/python/ops/kmeans_test.py new file mode 100644 index 0000000000..4709d79425 --- /dev/null +++ b/tensorflow/contrib/factorization/python/ops/kmeans_test.py @@ -0,0 +1,575 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for KMeans.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import time + +import numpy as np +from sklearn.cluster import KMeans as SklearnKMeans + +# pylint: disable=g-import-not-at-top +from tensorflow.contrib.factorization.python.ops import kmeans as kmeans_lib +from tensorflow.python.estimator import run_config +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import benchmark +from tensorflow.python.platform import flags +from tensorflow.python.platform import test +from tensorflow.python.training import input as input_lib +from tensorflow.python.training import queue_runner + +FLAGS = flags.FLAGS + + +def normalize(x): + return x / np.sqrt(np.sum(x * x, axis=-1, keepdims=True)) + + +def cosine_similarity(x, y): + return np.dot(normalize(x), np.transpose(normalize(y))) + + +def make_random_centers(num_centers, num_dims, center_norm=500): + return np.round( + np.random.rand(num_centers, num_dims).astype(np.float32) * center_norm) + + +def make_random_points(centers, num_points, max_offset=20): + num_centers, num_dims = centers.shape + assignments = np.random.choice(num_centers, num_points) + offsets = np.round( + np.random.randn(num_points, num_dims).astype(np.float32) * max_offset) + return (centers[assignments] + offsets, assignments, np.add.reduce( + offsets * offsets, 1)) + + +class KMeansTestBase(test.TestCase): + + def input_fn(self, + batch_size=None, + points=None, + randomize=None, + num_epochs=None): + """Returns an input_fn that randomly selects batches from given points.""" + batch_size = batch_size or self.batch_size + points = points if points is not None else self.points + num_points = points.shape[0] + if randomize is None: + randomize = (self.use_mini_batch and + self.mini_batch_steps_per_iteration <= 1) + + def _fn(): + x = constant_op.constant(points) + if batch_size == num_points: + return input_lib.limit_epochs(x, num_epochs=num_epochs), None + if randomize: + indices = random_ops.random_uniform( + constant_op.constant([batch_size]), + minval=0, + maxval=num_points - 1, + dtype=dtypes.int32, + seed=10) + else: + # We need to cycle through the indices sequentially. We create a queue + # to maintain the list of indices. + q = data_flow_ops.FIFOQueue(num_points, dtypes.int32, ()) + + # Conditionally initialize the Queue. + def _init_q(): + with ops.control_dependencies( + [q.enqueue_many(math_ops.range(num_points))]): + return control_flow_ops.no_op() + + init_q = control_flow_ops.cond(q.size() <= 0, _init_q, + control_flow_ops.no_op) + with ops.control_dependencies([init_q]): + offsets = q.dequeue_many(batch_size) + with ops.control_dependencies([q.enqueue_many(offsets)]): + indices = array_ops.identity(offsets) + batch = array_ops.gather(x, indices) + return (input_lib.limit_epochs(batch, num_epochs=num_epochs), None) + + return _fn + + @staticmethod + def config(tf_random_seed): + return run_config.RunConfig().replace(tf_random_seed=tf_random_seed) + + @property + def initial_clusters(self): + return kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT + + @property + def batch_size(self): + return self.num_points + + @property + def use_mini_batch(self): + return False + + @property + def mini_batch_steps_per_iteration(self): + return 1 + + +class KMeansTest(KMeansTestBase): + + def setUp(self): + np.random.seed(3) + self.num_centers = 5 + self.num_dims = 2 + self.num_points = 1000 + self.true_centers = make_random_centers(self.num_centers, self.num_dims) + self.points, _, self.scores = make_random_points(self.true_centers, + self.num_points) + self.true_score = np.add.reduce(self.scores) + + def _kmeans(self, relative_tolerance=None): + return kmeans_lib.KMeansClustering( + self.num_centers, + initial_clusters=self.initial_clusters, + distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + use_mini_batch=self.use_mini_batch, + mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, + random_seed=24, + relative_tolerance=relative_tolerance) + + def test_clusters(self): + kmeans = self._kmeans() + kmeans.train(input_fn=self.input_fn(), steps=1) + clusters = kmeans.cluster_centers() + self.assertAllEqual(list(clusters.shape), [self.num_centers, self.num_dims]) + + def test_fit(self): + kmeans = self._kmeans() + kmeans.train(input_fn=self.input_fn(), steps=1) + score1 = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) + steps = 10 * self.num_points // self.batch_size + kmeans.train(input_fn=self.input_fn(), steps=steps) + score2 = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) + self.assertTrue(score1 > score2) + self.assertNear(self.true_score, score2, self.true_score * 0.05) + + def test_monitor(self): + if self.use_mini_batch: + # We don't test for use_mini_batch case since the loss value can be noisy. + return + kmeans = kmeans_lib.KMeansClustering( + self.num_centers, + initial_clusters=self.initial_clusters, + distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + use_mini_batch=self.use_mini_batch, + mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, + config=self.config(14), + random_seed=12, + relative_tolerance=1e-4) + + kmeans.train( + input_fn=self.input_fn(), + # Force it to train until the relative tolerance monitor stops it. + steps=None) + score = kmeans.score(input_fn=self.input_fn(batch_size=self.num_points)) + self.assertNear(self.true_score, score, self.true_score * 0.01) + + def test_infer(self): + kmeans = self._kmeans() + # Make a call to fit to initialize the cluster centers. + max_steps = 1 + kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) + clusters = kmeans.cluster_centers() + + # Make a small test set + num_points = 10 + points, true_assignments, true_offsets = make_random_points( + clusters, num_points) + input_fn = self.input_fn(batch_size=num_points, points=points, num_epochs=1) + # Test predict + assignments = list(kmeans.predict_cluster_index(input_fn)) + self.assertAllEqual(assignments, true_assignments) + + # Test score + score = kmeans.score(input_fn=lambda: (constant_op.constant(points), None)) + self.assertNear(score, np.sum(true_offsets), 0.01 * score) + + # Test transform + transform = list(kmeans.transform(input_fn)) + true_transform = np.maximum( + 0, + np.sum(np.square(points), axis=1, keepdims=True) - + 2 * np.dot(points, np.transpose(clusters)) + np.transpose( + np.sum(np.square(clusters), axis=1, keepdims=True))) + self.assertAllClose(transform, true_transform, rtol=0.05, atol=10) + + +class KMeansTestMultiStageInit(KMeansTestBase): + + def test_random(self): + points = np.array( + [[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32) + kmeans = kmeans_lib.KMeansClustering( + num_clusters=points.shape[0], + initial_clusters=kmeans_lib.KMeansClustering.RANDOM_INIT, + distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + use_mini_batch=True, + mini_batch_steps_per_iteration=100, + random_seed=24, + relative_tolerance=None) + kmeans.train( + input_fn=self.input_fn(batch_size=1, points=points, randomize=False), + steps=1) + clusters = kmeans.cluster_centers() + self.assertAllEqual(points, clusters) + + def test_kmeans_plus_plus_batch_just_right(self): + points = np.array([[1, 2]], dtype=np.float32) + kmeans = kmeans_lib.KMeansClustering( + num_clusters=points.shape[0], + initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, + distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + use_mini_batch=True, + mini_batch_steps_per_iteration=100, + random_seed=24, + relative_tolerance=None) + kmeans.train( + input_fn=self.input_fn(batch_size=1, points=points, randomize=False), + steps=1) + clusters = kmeans.cluster_centers() + self.assertAllEqual(points, clusters) + + def test_kmeans_plus_plus_batch_too_small(self): + points = np.array( + [[1, 2], [3, 4], [5, 6], [7, 8], [9, 0]], dtype=np.float32) + kmeans = kmeans_lib.KMeansClustering( + num_clusters=points.shape[0], + initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, + distance_metric=kmeans_lib.KMeansClustering.SQUARED_EUCLIDEAN_DISTANCE, + use_mini_batch=True, + mini_batch_steps_per_iteration=100, + random_seed=24, + relative_tolerance=None) + with self.assertRaisesOpError(AssertionError): + kmeans.train( + input_fn=self.input_fn(batch_size=4, points=points, randomize=False), + steps=1) + + +class MiniBatchKMeansTest(KMeansTest): + + @property + def batch_size(self): + return 50 + + @property + def use_mini_batch(self): + return True + + +class FullBatchAsyncKMeansTest(KMeansTest): + + @property + def batch_size(self): + return 50 + + @property + def use_mini_batch(self): + return True + + @property + def mini_batch_steps_per_iteration(self): + return self.num_points // self.batch_size + + +class KMeansCosineDistanceTest(KMeansTestBase): + + def setUp(self): + self.points = np.array( + [[2.5, 0.1], [2, 0.2], [3, 0.1], [4, 0.2], [0.1, 2.5], [0.2, 2], + [0.1, 3], [0.2, 4]], + dtype=np.float32) + self.num_points = self.points.shape[0] + self.true_centers = np.array( + [ + normalize( + np.mean(normalize(self.points)[0:4, :], axis=0, + keepdims=True))[0], + normalize( + np.mean(normalize(self.points)[4:, :], axis=0, + keepdims=True))[0] + ], + dtype=np.float32) + self.true_assignments = np.array([0] * 4 + [1] * 4) + self.true_score = len(self.points) - np.tensordot( + normalize(self.points), self.true_centers[self.true_assignments]) + + self.num_centers = 2 + self.kmeans = kmeans_lib.KMeansClustering( + self.num_centers, + initial_clusters=kmeans_lib.KMeansClustering.RANDOM_INIT, + distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE, + use_mini_batch=self.use_mini_batch, + mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, + config=self.config(3)) + + def test_fit(self): + max_steps = 10 * self.num_points // self.batch_size + self.kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) + centers = normalize(self.kmeans.cluster_centers()) + centers = centers[centers[:, 0].argsort()] + true_centers = self.true_centers[self.true_centers[:, 0].argsort()] + self.assertAllClose(centers, true_centers, atol=0.04) + + def test_transform(self): + self.kmeans.train(input_fn=self.input_fn(), steps=10) + centers = normalize(self.kmeans.cluster_centers()) + true_transform = 1 - cosine_similarity(self.points, centers) + transform = list( + self.kmeans.transform( + input_fn=self.input_fn(batch_size=self.num_points, num_epochs=1))) + self.assertAllClose(transform, true_transform, atol=1e-3) + + def test_predict(self): + max_steps = 10 * self.num_points // self.batch_size + self.kmeans.train(input_fn=self.input_fn(), max_steps=max_steps) + centers = normalize(self.kmeans.cluster_centers()) + + assignments = list( + self.kmeans.predict_cluster_index( + input_fn=self.input_fn(num_epochs=1, batch_size=self.num_points))) + self.assertAllClose( + centers[assignments], + self.true_centers[self.true_assignments], + atol=1e-2) + + centers = centers[centers[:, 0].argsort()] + true_centers = self.true_centers[self.true_centers[:, 0].argsort()] + self.assertAllClose(centers, true_centers, atol=0.04) + score = self.kmeans.score( + input_fn=self.input_fn(batch_size=self.num_points)) + self.assertAllClose(score, self.true_score, atol=1e-2) + + def test_predict_kmeans_plus_plus(self): + # Most points are concetrated near one center. KMeans++ is likely to find + # the less populated centers. + points = np.array( + [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2], + [-2.8, -3.], [-2.9, -3.1], [-3., -3.1], [-3., -3.1], [-3.2, -3.], + [-3., -3.]], + dtype=np.float32) + true_centers = np.array( + [ + normalize( + np.mean(normalize(points)[0:2, :], axis=0, keepdims=True))[0], + normalize( + np.mean(normalize(points)[2:4, :], axis=0, keepdims=True))[0], + normalize(np.mean(normalize(points)[4:, :], axis=0, + keepdims=True))[0] + ], + dtype=np.float32) + true_assignments = [0] * 2 + [1] * 2 + [2] * 8 + true_score = len(points) - np.tensordot( + normalize(points), true_centers[true_assignments]) + + kmeans = kmeans_lib.KMeansClustering( + 3, + initial_clusters=self.initial_clusters, + distance_metric=kmeans_lib.KMeansClustering.COSINE_DISTANCE, + use_mini_batch=self.use_mini_batch, + mini_batch_steps_per_iteration=self.mini_batch_steps_per_iteration, + config=self.config(3)) + kmeans.train( + input_fn=lambda: (constant_op.constant(points), None), steps=30) + + centers = normalize(kmeans.cluster_centers()) + self.assertAllClose( + sorted(centers.tolist()), sorted(true_centers.tolist()), atol=1e-2) + + def _input_fn(): + return (input_lib.limit_epochs( + constant_op.constant(points), num_epochs=1), None) + + assignments = list(kmeans.predict_cluster_index(input_fn=_input_fn)) + self.assertAllClose( + centers[assignments], true_centers[true_assignments], atol=1e-2) + + score = kmeans.score(input_fn=lambda: (constant_op.constant(points), None)) + self.assertAllClose(score, true_score, atol=1e-2) + + +class MiniBatchKMeansCosineTest(KMeansCosineDistanceTest): + + @property + def batch_size(self): + return 2 + + @property + def use_mini_batch(self): + return True + + +class FullBatchAsyncKMeansCosineTest(KMeansCosineDistanceTest): + + @property + def batch_size(self): + return 2 + + @property + def use_mini_batch(self): + return True + + @property + def mini_batch_steps_per_iteration(self): + return self.num_points // self.batch_size + + +class KMeansBenchmark(benchmark.Benchmark): + """Base class for benchmarks.""" + + def SetUp(self, + dimension=50, + num_clusters=50, + points_per_cluster=10000, + center_norm=500, + cluster_width=20): + np.random.seed(123456) + self.num_clusters = num_clusters + self.num_points = num_clusters * points_per_cluster + self.centers = make_random_centers( + self.num_clusters, dimension, center_norm=center_norm) + self.points, _, scores = make_random_points( + self.centers, self.num_points, max_offset=cluster_width) + self.score = float(np.sum(scores)) + + def _report(self, num_iters, start, end, scores): + print(scores) + self.report_benchmark( + iters=num_iters, + wall_time=(end - start) / num_iters, + extras={'true_sum_squared_distances': self.score, + 'fit_scores': scores}) + + def _fit(self, num_iters=10): + pass + + def benchmark_01_2dim_5center_500point(self): + self.SetUp(dimension=2, num_clusters=5, points_per_cluster=100) + self._fit() + + def benchmark_02_20dim_20center_10kpoint(self): + self.SetUp(dimension=20, num_clusters=20, points_per_cluster=500) + self._fit() + + def benchmark_03_100dim_50center_50kpoint(self): + self.SetUp(dimension=100, num_clusters=50, points_per_cluster=1000) + self._fit() + + def benchmark_03_100dim_50center_50kpoint_unseparated(self): + self.SetUp( + dimension=100, + num_clusters=50, + points_per_cluster=1000, + cluster_width=250) + self._fit() + + def benchmark_04_100dim_500center_500kpoint(self): + self.SetUp(dimension=100, num_clusters=500, points_per_cluster=1000) + self._fit(num_iters=4) + + def benchmark_05_100dim_500center_500kpoint_unseparated(self): + self.SetUp( + dimension=100, + num_clusters=500, + points_per_cluster=1000, + cluster_width=250) + self._fit(num_iters=4) + + +class TensorflowKMeansBenchmark(KMeansBenchmark): + + def _fit(self, num_iters=10): + scores = [] + start = time.time() + for i in range(num_iters): + print('Starting tensorflow KMeans: %d' % i) + tf_kmeans = kmeans_lib.KMeansClustering( + self.num_clusters, + initial_clusters=kmeans_lib.KMeansClustering.KMEANS_PLUS_PLUS_INIT, + kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2), + random_seed=i * 42, + relative_tolerance=1e-6, + config=self.config(3)) + tf_kmeans.train( + input_fn=lambda: (constant_op.constant(self.points), None), steps=50) + _ = tf_kmeans.cluster_centers() + scores.append( + tf_kmeans.score( + input_fn=lambda: (constant_op.constant(self.points), None))) + self._report(num_iters, start, time.time(), scores) + + +class SklearnKMeansBenchmark(KMeansBenchmark): + + def _fit(self, num_iters=10): + scores = [] + start = time.time() + for i in range(num_iters): + print('Starting sklearn KMeans: %d' % i) + sklearn_kmeans = SklearnKMeans( + n_clusters=self.num_clusters, + init='k-means++', + max_iter=50, + n_init=1, + tol=1e-4, + random_state=i * 42) + sklearn_kmeans.train(self.points) + scores.append(sklearn_kmeans.inertia_) + self._report(num_iters, start, time.time(), scores) + + +class KMeansTestQueues(test.TestCase): + + def input_fn(self): + + def _fn(): + queue = data_flow_ops.FIFOQueue( + capacity=10, dtypes=dtypes.float32, shapes=[10, 3]) + enqueue_op = queue.enqueue(array_ops.zeros([10, 3], dtype=dtypes.float32)) + queue_runner.add_queue_runner( + queue_runner.QueueRunner(queue, [enqueue_op])) + return queue.dequeue(), None + + return _fn + + # This test makes sure that there are no deadlocks when using a QueueRunner. + # Note that since cluster initialization is dependendent on inputs, if input + # is generated using a QueueRunner, one has to make sure that these runners + # are started before the initialization. + def test_queues(self): + kmeans = kmeans_lib.KMeansClustering(5) + kmeans.train(input_fn=self.input_fn(), steps=1) + + +if __name__ == '__main__': + test.main() -- GitLab From 37d297d00a0639c53bf7366afd7d4836c2e09fcf Mon Sep 17 00:00:00 2001 From: David Norman Date: Thu, 5 Oct 2017 16:46:27 +0100 Subject: [PATCH 055/909] Re-instate the plugin BUILD (#13291) * Re-instate the plugin BUILD * Adding a README to describe the purpose of this directory --- tensorflow/compiler/jit/BUILD | 1 + tensorflow/compiler/plugin/BUILD | 42 ++++++++++++++++++++++++++++ tensorflow/compiler/plugin/README.md | 16 +++++++++++ 3 files changed, 59 insertions(+) create mode 100644 tensorflow/compiler/plugin/BUILD create mode 100644 tensorflow/compiler/plugin/README.md diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index bf63b7e501..bf7d9cf14d 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -33,6 +33,7 @@ cc_library( deps = [ ":xla_cpu_device", ":xla_cpu_jit", + "//tensorflow/compiler/plugin", ] + if_cuda_is_configured([ ":xla_gpu_device", ":xla_gpu_jit", diff --git a/tensorflow/compiler/plugin/BUILD b/tensorflow/compiler/plugin/BUILD new file mode 100644 index 0000000000..f088672154 --- /dev/null +++ b/tensorflow/compiler/plugin/BUILD @@ -0,0 +1,42 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Configuration file for an XLA plugin. + + please don't check in changes to this file. to prevent changes appearing + in git status, use: + + git update-index --assume-unchanged tensorflow/compiler/plugin/BUILD + + To add additional devices to the XLA subsystem, add targets to the + dependency list in the 'plugin' target. For instance: + + deps = ["//tensorflow/compiler/plugin/example:plugin_lib"], + + ** Please don't remove this file - it is supporting some 3rd party plugins ** +""" + +licenses(["notice"]) + +package( + default_visibility = ["//visibility:public"], +) + +cc_library( + name = "plugin", + deps = [ + #"//tensorflow/compiler/plugin/example:example_lib", + ], +) diff --git a/tensorflow/compiler/plugin/README.md b/tensorflow/compiler/plugin/README.md new file mode 100644 index 0000000000..9dd0d2bdab --- /dev/null +++ b/tensorflow/compiler/plugin/README.md @@ -0,0 +1,16 @@ +3rd party XLA devices +--------------------- + +This directory is intended as a place for 3rd party XLA devices which are _not_ +integrated into the public repository. + +By adding entries to the BUILD target in this directory, a third party device +can be included as a dependency of the JIT subsystem. + +For integration into the unit test system, see the files: + +- tensorflow/compiler/tests/plugin.bzl +- tensorflow/compiler/xla/tests/plugin.bzl + + +- -- GitLab From ae98ba9ac2e9889ea38c45539296ab8efe432933 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 5 Oct 2017 08:51:37 -0700 Subject: [PATCH 056/909] imperative_gradient doesn't fail if some variables are not connected to the output PiperOrigin-RevId: 171158798 --- tensorflow/python/eager/backprop.py | 14 ++++---------- tensorflow/python/eager/backprop_test.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 5e3af16fb2..1d729cc2e1 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -99,7 +99,7 @@ def _prepare_backprop(target, tensor_to_op, op_to_entry, id_sources): o_to_e = {} # Copy of just the bits we need from op_to_entry while tensor_stack: t = tensor_stack.pop() - op = tensor_to_op[t] + op = tensor_to_op.get(t, None) # op is None if the tensor is a source (i.e. was watched directly) if op is None or op in o_to_e: continue @@ -313,15 +313,9 @@ def imperative_grad( for i, s in enumerate(sources): g = gradients.get(ops.tensor_id(s), None) if g is None: - # TODO(apassos): figure out a way to summarize why sources and targets are - # not connected. - raise ValueError("There is no sequence of operations connecting source " - "tensor %s (%s) to any of the target Tensors. This is " - "commonly caused by the tape not recording all " - "operations in the forward pass or if by mistake a " - "source was only used in non-differentiable operations." - % (i, s)) - result.append(_aggregate_grads(g)) + result.append(None) + else: + result.append(_aggregate_grads(g)) return result _op_attr_type_cache = {} diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 07d2d2a148..3b72974fc7 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -255,6 +255,16 @@ class BackpropTest(test.TestCase): self.assertAllEqual(dx.numpy(), y.numpy()) self.assertAllEqual(dy.numpy(), x.numpy()) + def testUnconnectedNone(self): + v = resource_variable_ops.ResourceVariable( + 1.0, name='testUnconnectedNone') + + def f(): + v.read_value() + return constant_op.constant(1.0) + + self.assertEqual(backprop.implicit_grad(f)()[0][0], None) + def testEmptyParamsForValueAndGradFunction(self): def fn(a, b): return a * b -- GitLab From 8dc5e3718b85b72a8bc6e5a2ea8270eecfdf99a1 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 5 Oct 2017 09:41:13 -0700 Subject: [PATCH 057/909] [TFXLA] Functionalize tf.cond. Convert tf.cond to functional form output = cond ? then_branch(inputs) : else_branch(inputs) where then_branch and else_branch are functions. PiperOrigin-RevId: 171164597 --- tensorflow/compiler/tf2xla/BUILD | 4 + .../tf2xla/functionalize_control_flow.cc | 813 +++++++++++++++++- .../tf2xla/functionalize_control_flow.h | 1 - .../tf2xla/functionalize_control_flow_test.cc | 129 +++ .../compiler/tf2xla/ops/functional_ops.cc | 39 +- tensorflow/python/ops/control_flow_ops.py | 6 + 6 files changed, 949 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 08f2249e0d..4da2ed722e 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -253,6 +253,7 @@ tf_cc_test( "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) @@ -347,6 +348,7 @@ cc_library( hdrs = ["functionalize_control_flow.h"], deps = [ "//tensorflow/compiler/jit:graph_to_functiondef", + "//tensorflow/compiler/jit:union_find", "//tensorflow/compiler/tf2xla:dump_graph", "//tensorflow/compiler/tf2xla/ops:functional_ops", "//tensorflow/compiler/xla:status_macros", @@ -354,6 +356,7 @@ cc_library( "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", + "//tensorflow/core:lib", ], ) @@ -371,6 +374,7 @@ tf_cc_test( "//tensorflow/compiler/tf2xla/cc:functional_ops", "//tensorflow/compiler/xla:status_macros", "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:ops", diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 1c7a2046aa..56d8bb4f2c 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -17,15 +17,19 @@ limitations under the License. #include #include +#include #include #include #include "tensorflow/compiler/jit/graph_to_functiondef.h" +#include "tensorflow/compiler/jit/union_find.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/graph/control_flow.h" +#include "tensorflow/core/lib/gtl/optional.h" namespace tensorflow { @@ -74,7 +78,8 @@ struct Frame { // starting at nodes in vector `stack`. // `node_map` is a vector indexed by source node ID to dest nodes. // Does not traverse into nodes in `node_map`, so by adding nodes to `node_map` -// before the traversal clients can cut the graph. Returns an error if the +// before the traversal clients can cut the graph. If a frame is provided (frame +// != nullptr), then this functions will return an error if the // traversal leaves 'frame'; the client must add enough nodes to `node_map` to // cut the graph and prevent the traversal from escaping. // @@ -84,7 +89,7 @@ struct Frame { // taking from the Switch node was not necessarily the first output, but _Arg // nodes only have one output. By adding the Switch node to `squash_src_outputs` // we rewrite the src_output of the corresponding edge to be 0. -Status CopySubgraph(const Graph& graph, const Frame& frame, +Status CopySubgraph(const Graph& graph, const Frame* frame, std::vector stack, const std::vector& squash_src_outputs, std::vector* node_map, Graph* output) { @@ -100,9 +105,9 @@ Status CopySubgraph(const Graph& graph, const Frame& frame, for (const Edge* e : n->in_edges()) { Node* src = e->src(); - if (frame.nodes.find(src) == frame.nodes.end()) { + if (frame != nullptr && frame->nodes.find(src) == frame->nodes.end()) { // We traversed out of the loop frame, without encountering a cut node. - return errors::Internal("Graph traversal of loop frame ", frame.name, + return errors::Internal("Graph traversal of loop frame ", frame->name, " escaped frame at ", src->name(), " without encountering an argument node."); } @@ -119,27 +124,31 @@ Status CopySubgraph(const Graph& graph, const Frame& frame, return Status::OK(); } -Status BuildArgNode(Graph* graph, DataType type, int index, Node** arg_node) { +xla::StatusOr AddNode(const NodeDef& node_def, Graph* graph) { + Status status; + Node* inserted_node = graph->AddNode(node_def, &status); + if (!status.ok()) { + return status; + } + return inserted_node; +} + +xla::StatusOr BuildArgNode(Graph* graph, DataType type, int index) { NodeDef arg_def; - NodeDefBuilder builder(strings::StrCat("_Arg", index), kArgOp); + NodeDefBuilder builder(strings::StrCat(kArgOp, index), kArgOp); builder.Attr("T", type); builder.Attr("index", index); TF_RETURN_IF_ERROR(builder.Finalize(&arg_def)); - Status status; - *arg_node = graph->AddNode(arg_def, &status); - return status; + return AddNode(arg_def, graph); } -Status BuildRetvalNode(Graph* graph, DataType type, int index, - Node** retval_node) { +xla::StatusOr BuildRetvalNode(Graph* graph, DataType type, int index) { NodeDef ret_def; ret_def.set_op(kRetValOp); - ret_def.set_name(strings::StrCat("_Retval", index)); + ret_def.set_name(strings::StrCat(kRetValOp, index)); AddNodeAttr("T", type, &ret_def); AddNodeAttr("index", index, &ret_def); - Status status; - *retval_node = graph->AddNode(ret_def, &status); - return status; + return AddNode(ret_def, graph); } // Builds a graph for the loop condition. @@ -157,9 +166,8 @@ Status BuildLoopCondition(const Graph& graph, Frame* frame, for (int i = 0; i < frame->args.size(); ++i) { const Arg& arg = frame->args[i]; - Node* arg_node; - TF_RETURN_IF_ERROR( - BuildArgNode(output, arg.enter->input_type(0), i, &arg_node)); + TF_ASSIGN_OR_RETURN(Node * arg_node, + BuildArgNode(output, arg.enter->input_type(0), i)); if (arg.is_loop_invariant) { node_map[arg.enter->id()] = arg_node; } else { @@ -169,16 +177,14 @@ Status BuildLoopCondition(const Graph& graph, Frame* frame, // Build a Retval node for the loop condition. The LoopCond nodes are always // boolean because of the type constraints on the LoopCond op. - TF_RETURN_IF_ERROR( - BuildRetvalNode(output, DT_BOOL, 0, &node_map[frame->loop_cond->id()])); + TF_ASSIGN_OR_RETURN(node_map[frame->loop_cond->id()], + BuildRetvalNode(output, DT_BOOL, 0)); // Performs a reverse DFS, copying nodes and edges to the output graph. // The _Arg and _Retval nodes were added unconditionally above, so we are // guaranteed to get the correct function signature. - TF_RETURN_IF_ERROR(CopySubgraph(graph, *frame, {frame->loop_cond}, - squash_src_outputs, &node_map, output)); - - return Status::OK(); + return CopySubgraph(graph, frame, {frame->loop_cond}, squash_src_outputs, + &node_map, output); } // Builds a graph for the loop body. @@ -202,8 +208,8 @@ Status BuildLoopBody(const Graph& graph, Frame* frame, DataType dtype = arg.enter->input_type(0); arg_types->push_back(dtype); - Node* arg_node; - TF_RETURN_IF_ERROR(BuildArgNode(output, dtype, i, &arg_node)); + + TF_ASSIGN_OR_RETURN(Node * arg_node, BuildArgNode(output, dtype, i)); if (dtype == DT_RESOURCE) { // The convention of the XLA bridge is that resource variable arguments @@ -213,8 +219,8 @@ Status BuildLoopBody(const Graph& graph, Frame* frame, TF_RET_CHECK(arg.is_loop_invariant); node_map[arg.enter->id()] = arg_node; } else { - Node* retval_node; - TF_RETURN_IF_ERROR(BuildRetvalNode(output, dtype, i, &retval_node)); + TF_ASSIGN_OR_RETURN(Node * retval_node, + BuildRetvalNode(output, dtype, i)); if (arg.is_loop_invariant) { // Argument is loop-invariant. Forward it from the Arg to the Retval. @@ -237,7 +243,7 @@ Status BuildLoopBody(const Graph& graph, Frame* frame, // Performs a reverse DFS, copying nodes and edges to the output graph. // The _Arg and _Retval nodes were added unconditionally above, so we are // guaranteed to get the correct function signature. - TF_RETURN_IF_ERROR(CopySubgraph(graph, *frame, std::move(next_iterations), + TF_RETURN_IF_ERROR(CopySubgraph(graph, frame, std::move(next_iterations), squash_src_outputs, &node_map, output)); return Status::OK(); @@ -450,12 +456,7 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, } builder.Input(inputs); TF_RETURN_IF_ERROR(builder.Finalize(&while_def)); - - Status status; - Node* while_node = graph->AddNode(while_def, &status); - if (!status.ok()) { - return status; - } + TF_ASSIGN_OR_RETURN(Node * while_node, AddNode(while_def, graph)); // Copies edges to the Enter nodes and from the Exit nodes onto the While. for (int i = 0; i < frame->args.size(); ++i) { @@ -488,6 +489,7 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, for (Node* node : frame->nodes) { graph->RemoveNode(node); } + frame->nodes.clear(); frame->parent->nodes.insert(while_node); VLOG(2) << "Frame " << frame->name << " after: " @@ -496,6 +498,742 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, return Status::OK(); } +class FunctionalizeCond { + public: + // Identifies the connected parts of the tf.Cond. + struct ClusterHandle { + explicit ClusterHandle(int representative = -1) + : representative(representative) {} + + bool operator==(const ClusterHandle& other) const { + return representative == other.representative; + } + + bool operator!=(const ClusterHandle& other) const { + return !(*this == other); + } + + bool operator<(const ClusterHandle& other) const { + return representative < other.representative; + } + + bool operator>(const ClusterHandle& other) const { + return representative > other.representative; + } + + string ToString() const { + return strings::StrCat("Cluster_", representative); + } + + // Vector of UnionFind indexable by ClusterHandle and Node*. + struct Vector { + explicit Vector(size_t size) : clusters(size) {} + + UnionFind& at(const ClusterHandle& cluster) { + return clusters.at(cluster.representative); + } + + UnionFind& at(const Node* node) { + return clusters.at(node->id()); + } + + UnionFind& operator[](const Node* node) { + return clusters.at(node->id()); + } + + size_t size() const { return clusters.size(); } + + void resize(size_t count) { return clusters.resize(count); } + + private: + std::vector> clusters; + }; + + private: + int representative; + }; + + // Represents a node in the clustered graph consisting of switch_nodes, + // merge_nodes as well as the edges into and out of this node to other + // Clusters. Each Cluster corresponds to a ClusterHandle and has a + // corresponding representative. + struct Cluster { + std::unordered_set switch_nodes; + std::unordered_set merge_nodes; + std::unordered_set in_nodes; + std::unordered_set out_nodes; + + // A member of the ClusterHandle corresponding to this Cluster. + ClusterHandle representative; + bool visited = false; + }; + + // Represent the clustered graph as map from cluster representative to + // Cluster. + using ClusteredGraph = std::map; + + // The arguments and condition of a XlaIf. The arguments are ordered by node + // id in the original graph. + struct CondArgs { + struct CondCmp { + bool operator()(const Node* a, const Node* b) { + return a->id() < b->id(); + } + }; + Node* conditional = nullptr; + std::set args; + }; + + static Status Functionalize(Graph* graph, FunctionLibraryDefinition* library); + + private: + FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library) + : clusters_(graph->num_node_ids()), library_(library), graph_(graph) {} + + // Returns a vector of Merge nodes from the clustered graph where the nodes + // are sorted by the number of switch nodes minus number of merge nodes + // from a root of the clustered graph to the given Merge node, with ties + // broken by the representative of the Cluster. + std::vector> SortedMergeNodes(); + + // Returns whether the graph has no conditionals. + bool NoConditionals() const { return merge_nodes_.empty(); } + + // Construct the clustered graph by creating nodes for each cluster and the + // connections between the clusters. Switch and Merge nodes partition + // clusters, so iterate over those. Note: a Cluster may have neither a + // Merge or Switch but will have an in/out edge from a Cluster that has. + void CreateClusters(); + + // Creates the clustered graph by identifying all the edges between different + // clusters and collecting all switch and merge nodes that correspond to a + // cluster. + void CreateClusteredGraph(); + + // If `from` and `to` correspond to different clusters, then merge the nodes + // in the clustered graph corresponding to `from` and `to`. + void ContractEdge(Cluster* from, Cluster* to); + + // Converts a Merge node to a XlaIf. This encapsulates the process of + // extracting the bodies needed for the then and else branch, creates a XlaIf + // node, removing the nodes of the branches from the graph and replacing the + // merge node with a XlaIf. + Status ConvertMergeToXlaIf(Cluster* merge_cluster); + + // Returns the switch cluster corresponding to the merge node. This function + // only returns the switch cluster in the simple case where we have a switch + // node is the entry of a diamond corresponding to a conditional: + // + // Switch + // / \ + // Branch Branch + // \ / + // merge_cluster + gtl::optional GetSwitchCluster(const Cluster& merge_cluster); + + // Determines the arguments needed as input to the Merge cluster originating + // from the Switch cluster. + xla::StatusOr DetermineCondArgs(const Cluster& merge_cluster, + const Cluster& switch_cluster); + + // Builds a XlaIfOp to replace the Merge node with. + xla::StatusOr BuildAndAddXlaIfOp(const CondArgs& cond_args, + const Cluster& merge_cluster, + const std::vector& outputs); + + // Extracts a function body corresponding to the given input edge of the merge + // node. + Status ExtractBody(const CondArgs& cond_args, const Cluster& merge_cluster, + const std::vector& outputs, int input_edge, + Graph* body); + + // Adds all the input edges to `if_node` corresponding to the arguments. + Status AddInputEdges(const CondArgs& cond_args, Node* if_node); + + // Adds all output edges from the `if_node`. + Status AddOutputEdges(const std::vector& outputs, Node* if_node); + + // Removes all nodes from the graph that are part of cluster. + void RemoveClusterNodes(Cluster* cluster); + + // Removes all argument nodes that are unused. + template + void RemoveUnusedArgs(const T& args); + + // Removes all Merge nodes that are unused. + void RemoveUnusedMergeNodes(Cluster* merge_cluster); + + // Returns the representative member of the corresponding cluster. + ClusterHandle Representative(const Node* node) { + return clusters_.at(node).Get(); + } + + ClusteredGraph clustered_graph_; + ClusterHandle::Vector clusters_; + std::unordered_set merge_nodes_; + std::unordered_set switch_nodes_; + FunctionLibraryDefinition* library_; + Graph* graph_; +}; + +std::ostream& operator<<(std::ostream& os, + const FunctionalizeCond::ClusterHandle& c) { + os << c.ToString(); + return os; +} + +// Returns a dot representation of the clustered graph showing the connections +// between the nodes and the nodes in each cluster. +string DebugString(const Graph& graph, + FunctionalizeCond::ClusterHandle::Vector* clusters) { + string ret = "digraph {\ncompound=true;labeljust=\"r\";\n"; + std::map subgraphs; + for (Node* n : graph.nodes()) { + if (n->IsOp()) { + strings::StrAppend(&subgraphs[clusters->at(n).Get()], n->id(), + " [label=\"", n->name(), "\"];\n"); + } + } + for (auto kv : subgraphs) { + strings::StrAppend(&ret, "subgraph cluster_", kv.first.ToString(), " {\n", + "label = \"", kv.first.ToString(), "\";\n", kv.second, + "}\n"); + } + for (Node* n : graph.nodes()) { + if (!n->IsOp()) { + continue; + } + for (Node* in : n->in_nodes()) { + if (in->IsOp()) { + strings::StrAppend(&ret, in->id(), " -> ", n->id(), ";\n"); + } + } + } + return strings::StrCat(ret, "}"); +} + +bool IsDeadSwitch(const Node* node) { + for (const Edge* e : node->out_edges()) { + const Node* dst = e->dst(); + if (!dst->IsIdentity()) { + return false; + } + for (const Edge* ee : dst->out_edges()) { + if (!ee->IsControlEdge() || !ee->dst()->IsSink()) { + return false; + } + } + } + return true; +} + +void FunctionalizeCond::CreateClusters() { + for (Node* node : graph_->nodes()) { + if (!node->IsOp()) { + continue; + } + if (IsSwitch(node)) { + switch_nodes_.insert(node); + } else if (IsMerge(node)) { + merge_nodes_.insert(node); + } + ClusterHandle& cluster = clusters_.at(node).Get(); + cluster = ClusterHandle(node->id()); + } + + // If there are no Merge nodes, then terminate. + if (merge_nodes_.empty()) { + return; + } + + // Remove all dead Switch nodes. + RemoveUnusedArgs(switch_nodes_); + + // All parent_'s are still nullptr so clusters_ may still be resized. Resize + // conservatively assuming all merge nodes become XlaIf nodes. + clusters_.resize(clusters_.size() + merge_nodes_.size()); + + // Merge a cluster with its input, unless the input is a Switch node or the + // node is a Merge node. + for (const Node* node : graph_->nodes()) { + if (IsMerge(node) || !node->IsOp()) { + continue; + } + for (const Node* in : node->in_nodes()) { + if (!IsSwitch(in) && in->IsOp()) { + clusters_.at(node).Merge(&clusters_.at(in)); + } + } + } +} + +void FunctionalizeCond::ContractEdge(Cluster* from, Cluster* to) { + VLOG(3) << "ContractEdge from = " << from->representative + << " to = " << to->representative; + if (from->representative == to->representative) { + return; + } + to->merge_nodes.insert(from->merge_nodes.begin(), from->merge_nodes.end()); + from->merge_nodes.clear(); + to->switch_nodes.insert(from->switch_nodes.begin(), from->switch_nodes.end()); + from->switch_nodes.clear(); + + for (Cluster* from_out : from->out_nodes) { + from_out->in_nodes.erase(from); + if (from_out->representative != to->representative) { + from_out->in_nodes.insert(to); + to->out_nodes.insert(from_out); + } + } + from->out_nodes.clear(); + + for (Cluster* from_in : from->in_nodes) { + from_in->out_nodes.erase(from); + if (from_in->representative != to->representative) { + from_in->out_nodes.insert(to); + to->in_nodes.insert(from_in); + } + } + from->in_nodes.clear(); + + to->in_nodes.erase(from); + to->out_nodes.erase(from); + clusters_.at(to->representative).Merge(&clusters_.at(from->representative)); + from->visited = true; +} + +void FunctionalizeCond::CreateClusteredGraph() { + auto update_cluster_for_node = [this](Node* node) -> Cluster& { + ClusterHandle repr = Representative(node); + Cluster& cluster_node = clustered_graph_[repr]; + cluster_node.representative = repr; + for (const Node* in : node->in_nodes()) { + ClusterHandle other_repr = Representative(in); + // Skip source, sink and internal edges. + if (!in->IsOp() || other_repr == repr) { + continue; + } + Cluster& cluster_node_in = clustered_graph_[other_repr]; + cluster_node.in_nodes.insert(&cluster_node_in); + cluster_node_in.out_nodes.insert(&cluster_node); + cluster_node_in.representative = other_repr; + } + for (const Node* out : node->out_nodes()) { + ClusterHandle other_repr = Representative(out); + // Skip source, sink and internal edges. + if (!out->IsOp() || other_repr == repr) { + continue; + } + Cluster& cluster_node_out = clustered_graph_[other_repr]; + cluster_node.out_nodes.insert(&cluster_node_out); + cluster_node_out.in_nodes.insert(&cluster_node); + cluster_node_out.representative = other_repr; + } + return cluster_node; + }; + for (Node* node : switch_nodes_) { + update_cluster_for_node(node).switch_nodes.insert(node); + } + for (Node* node : merge_nodes_) { + update_cluster_for_node(node).merge_nodes.insert(node); + } + + // Merge Merge nodes with common input together. + for (Node* node : merge_nodes_) { + Cluster& cluster = clustered_graph_.at(Representative(node)); + for (const Node* in : node->in_nodes()) { + if (!in->IsOp()) { + continue; + } + Cluster& cluster_node_in = clustered_graph_.at(Representative(in)); + for (auto it = cluster_node_in.out_nodes.begin(); + it != cluster_node_in.out_nodes.end();) { + ContractEdge(*it++, &cluster); + } + } + } + + VLOG(3) << "ClusteredGraph: " << DebugString(*graph_, &clusters_); +} + +gtl::optional FunctionalizeCond::GetSwitchCluster( + const Cluster& merge_cluster) { + VLOG(3) << "GetSwitchCluster for " << merge_cluster.representative; + gtl::optional switch_cluster; + if (merge_cluster.in_nodes.size() != 2) { + return gtl::nullopt; + } + for (const Cluster* in : merge_cluster.in_nodes) { + if (in->in_nodes.size() != 1) { + return gtl::nullopt; + } + for (auto inin : in->in_nodes) { + if (switch_cluster.has_value()) { + if (*switch_cluster != inin) { + return gtl::nullopt; + } + } else { + switch_cluster = inin; + } + } + } + return switch_cluster; +} + +xla::StatusOr FunctionalizeCond::DetermineCondArgs( + const Cluster& merge_cluster, const Cluster& switch_cluster) { + VLOG(2) << "DetermineCondArgs for " << merge_cluster.representative + << " with switch cluster " << switch_cluster.representative; + CondArgs ret; + auto feeds_into_branch_cluster = [&](Node* switch_cluster) { + for (Node* out : switch_cluster->out_nodes()) { + ClusterHandle repr = Representative(out); + for (Cluster* in : merge_cluster.in_nodes) { + if (repr == in->representative) { + return true; + } + } + } + return false; + }; + for (Node* switch_cluster_node : switch_cluster.switch_nodes) { + if (!feeds_into_branch_cluster(switch_cluster_node)) { + continue; + } + + Node* tmp; + TF_RETURN_IF_ERROR(switch_cluster_node->input_node(1, &tmp)); + if (ret.conditional == nullptr) { + ret.conditional = tmp; + } else if (ret.conditional != tmp) { + return errors::Unimplemented( + "Switch statements with different conditionals cannot be " + "converted into functional conditional."); + } + ret.args.insert(switch_cluster_node); + } + return ret; +} + +xla::StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( + const CondArgs& cond_args, const Cluster& merge_cluster, + const std::vector& outputs) { + VLOG(2) << "Build if op for {" + << str_util::Join(merge_cluster.merge_nodes, ", ", + [](string* out, const Node* node) { + strings::StrAppend(out, node->name()); + }) + << "}"; + NodeDef if_def; + // Create a new If node using the name of the merge node. + NodeDefBuilder builder( + strings::StrCat((*merge_cluster.merge_nodes.begin())->name(), "_If"), + "XlaIf"); + string branch[] = {"else_branch", "then_branch"}; + for (int i = 0; i < 2; ++i) { + static std::atomic sequence_num(0LL); + int64 id = ++sequence_num; + + NameAttrList body_name; + body_name.set_name( + strings::StrCat("_functionalize_if_", branch[i], "_", id)); + auto body = xla::MakeUnique(graph_->op_registry()); + TF_RETURN_IF_ERROR( + ExtractBody(cond_args, merge_cluster, outputs, i, body.get())); + FunctionDef body_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef(*body, body_name.name(), &body_fdef)); + TF_RETURN_IF_ERROR(library_->AddFunctionDef(body_fdef)); + builder.Attr(branch[i], body_name); + } + + // Build input type. + std::vector inputs; + DataTypeVector in_arg_types; + for (const Node* arg : cond_args.args) { + const Edge* in_edge; + TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge)); + if (in_edge->IsControlEdge()) { + builder.ControlInput(in_edge->src()->name()); + } else { + DataType dtype = arg->input_type(0); + inputs.emplace_back(NodeDefBuilder::NodeOut( + in_edge->src()->name(), in_edge->src_output(), dtype)); + in_arg_types.push_back(dtype); + } + } + builder.Attr("Tin", in_arg_types); + + // Build output type. + DataTypeVector out_type; + for (const Node* merge : merge_cluster.merge_nodes) { + DataType dtype = merge->output_type(0); + out_type.push_back(dtype); + } + builder.Attr("Tout", out_type); + + builder.Attr("Tcond", DT_BOOL); + builder.Device(cond_args.conditional->assigned_device_name()); + // Conditional should be the first input ... + builder.Input(NodeDefBuilder::NodeOut(cond_args.conditional->name(), 0, + cond_args.conditional->output_type(0))); + // ... followed by the other inputs. + builder.Input(inputs); + + TF_RETURN_IF_ERROR(builder.Finalize(&if_def)); + TF_ASSIGN_OR_RETURN(Node * if_node, AddNode(if_def, graph_)); + return if_node; +} + +void FunctionalizeCond::RemoveClusterNodes(Cluster* cluster) { + VLOG(3) << "RemoveClusterNodes for " << cluster->representative; + ClusterHandle repr = cluster->representative; + std::deque to_delete; + for (Node* node : graph_->nodes()) { + if (Representative(node) == repr) { + to_delete.push_back(node); + } + } + for (Node* n : to_delete) { + graph_->RemoveNode(n); + } +} + +template +void FunctionalizeCond::RemoveUnusedArgs(const T& args) { + VLOG(2) << "RemoveUnusedArgs among: " + << str_util::Join(args, ", ", [](string* output, const Node* node) { + strings::StrAppend(output, node->name()); + }); + + std::deque to_delete; + for (Node* arg : args) { + if (IsDeadSwitch(arg)) { + to_delete.push_back(arg); + for (Node* n : arg->out_nodes()) { + to_delete.push_back(n); + } + } + } + for (Node* n : to_delete) { + switch_nodes_.erase(n); + auto it = clustered_graph_.find(Representative(n)); + if (it != clustered_graph_.end()) { + it->second.switch_nodes.erase(n); + } + graph_->RemoveNode(n); + } +} + +Status FunctionalizeCond::ExtractBody(const CondArgs& cond_args, + const Cluster& merge_cluster, + const std::vector& outputs, + int input_edge, Graph* body) { + VLOG(2) << "ExtractBody for " << merge_cluster.representative; + std::vector squash_src_outputs(graph_->num_node_ids(), false); + std::vector node_map(graph_->num_node_ids(), nullptr); + int arg_count = 0; + for (const auto* arg : cond_args.args) { + DataType dtype = arg->input_type(0); + TF_ASSIGN_OR_RETURN(Node * arg_node, + BuildArgNode(body, dtype, arg_count++)); + if (dtype == DT_RESOURCE) { + bool constant; + TF_RETURN_IF_ERROR(GetNodeAttr(arg->attrs(), "is_constant", &constant)); + TF_RET_CHECK(constant); + } + node_map.at(arg->id()) = arg_node; + squash_src_outputs.at(arg->id()) = true; + } + + std::vector stack; + stack.reserve(outputs.size()); + for (int j = 0; j < outputs.size(); ++j) { + Node* node = outputs[j]; + TF_ASSIGN_OR_RETURN(node_map.at(node->id()), + BuildRetvalNode(body, node->output_type(0), + /*index=*/j)); + Node* in; + TF_RETURN_IF_ERROR(node->input_node(input_edge, &in)); + if (node_map.at(in->id()) == nullptr) { + node_map.at(in->id()) = body->CopyNode(in); + } + body->AddEdge(node_map.at(in->id()), j, node_map.at(node->id()), 0); + stack.push_back(in); + } + + return CopySubgraph(*graph_, nullptr, stack, squash_src_outputs, &node_map, + body); +} + +Status FunctionalizeCond::AddInputEdges(const CondArgs& cond_args, + Node* if_node) { + VLOG(3) << "AddInputEdges for " << if_node->name(); + int i = 0; + graph_->AddEdge(cond_args.conditional, 0, if_node, i++); + for (const Node* arg : cond_args.args) { + const Edge* in_edge; + TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge)); + if (in_edge->IsControlEdge()) { + graph_->AddControlEdge(in_edge->src(), if_node); + } else { + graph_->AddEdge(in_edge->src(), in_edge->src_output(), if_node, i++); + } + } + return Status::OK(); +} + +Status FunctionalizeCond::AddOutputEdges(const std::vector& outputs, + Node* if_node) { + VLOG(3) << "AddOutputEdges for " << if_node->name(); + for (int i = 0; i < outputs.size(); ++i) { + Node* node = outputs[i]; + std::vector edges(node->out_edges().begin(), + node->out_edges().end()); + for (const Edge* edge : edges) { + Node* dst = edge->dst(); + int dst_input = edge->dst_input(); + + if (edge->src_output() > 0) { + return errors::Unimplemented("Output of index (", edge->src_output(), + ") of merge node ", node->name()); + } + graph_->RemoveEdge(edge); + + int src_output = + dst_input == Graph::kControlSlot ? Graph::kControlSlot : i; + graph_->AddEdge(if_node, src_output, dst, dst_input); + } + } + return Status::OK(); +} + +void FunctionalizeCond::RemoveUnusedMergeNodes(Cluster* merge_cluster) { + VLOG(3) << "RemoveUnusedMergeNodes for " << merge_cluster->representative; + // Remove all merge nodes now dead post extraction of If. + for (auto it = merge_cluster->merge_nodes.begin(); + it != merge_cluster->merge_nodes.end();) { + Node* node = *it; + if (node->out_edges().empty()) { + graph_->RemoveNode(node); + merge_cluster->merge_nodes.erase(*it++); + } + } +} + +Status FunctionalizeCond::ConvertMergeToXlaIf(Cluster* merge_cluster) { + VLOG(1) << "ConvertMergeToXlaIf for " << merge_cluster->representative; + gtl::optional switch_cluster = GetSwitchCluster(*merge_cluster); + if (!switch_cluster.has_value()) { + return errors::FailedPrecondition( + "Merge cluster was not part of a simple conditional in the clustered " + "graph. Graph nodes in merge cluster {", + str_util::Join(merge_cluster->merge_nodes, ", ", + [](string* output, Node* node) { + strings::StrAppend(output, node->name()); + }), + "}"); + } + TF_ASSIGN_OR_RETURN(auto cond_args, + DetermineCondArgs(*merge_cluster, **switch_cluster)); + + // Sort the outputs by ID to produce more stable output. + std::vector outputs(merge_cluster->merge_nodes.begin(), + merge_cluster->merge_nodes.end()); + std::sort( + outputs.begin(), outputs.end(), + [](const Node* lhs, const Node* rhs) { return lhs->id() < rhs->id(); }); + + // Extract bodies and builds a If operator. + TF_ASSIGN_OR_RETURN(Node * if_node, + BuildAndAddXlaIfOp(cond_args, *merge_cluster, outputs)); + TF_RETURN_IF_ERROR(AddInputEdges(cond_args, if_node)); + TF_RETURN_IF_ERROR(AddOutputEdges(outputs, if_node)); + + // Remove the old nodes from the graph_ and contract the edges of the + // clustered graph. + for (auto in : merge_cluster->in_nodes) { + RemoveClusterNodes(in); + } + RemoveUnusedArgs(cond_args.args); + auto in_nodes = merge_cluster->in_nodes; + for (auto it = in_nodes.begin(); it != in_nodes.end();) { + ContractEdge(*it++, merge_cluster); + } + ContractEdge(*switch_cluster, merge_cluster); + RemoveUnusedMergeNodes(merge_cluster); + clusters_[if_node].Get() = ClusterHandle(merge_cluster->representative); + + return Status::OK(); +} + +std::vector> +FunctionalizeCond::SortedMergeNodes() { + VLOG(2) << "ProcessClusteredGraph"; + std::stack> stack; + for (auto& c : clustered_graph_) { + if (c.second.in_nodes.empty()) { + stack.push({0, &c.second}); + } + } + + // Perform a depth-first traversal of the clustered graph computing the + // switch-merge depth. + std::vector> queue; + std::unordered_set visited; + while (!stack.empty()) { + Cluster* n = stack.top().second; + size_t depth = stack.top().first; + stack.pop(); + + auto inserted = visited.insert(n); + if (!inserted.second) { + continue; + } + + size_t new_depth = depth; + if (!n->merge_nodes.empty()) { + queue.emplace_back(depth, n); + --new_depth; + } + if (!n->switch_nodes.empty()) { + ++new_depth; + } + for (Cluster* e : n->out_nodes) { + stack.emplace(new_depth, e); + } + } + + // Sort in reverse order of switch-merge depth with ties broken by the + // ClusterHandle. + std::sort(queue.begin(), queue.end(), + [](const std::pair& lhs, + const std::pair& rhs) { + return std::tie(lhs.first, lhs.second->representative) > + std::tie(rhs.first, rhs.second->representative); + }); + + return queue; +} + +Status FunctionalizeCond::Functionalize(Graph* graph, + FunctionLibraryDefinition* library) { + VLOG(1) << "FunctionalizeCond::Functionalize"; + FunctionalizeCond fc(graph, library); + fc.CreateClusters(); + if (fc.NoConditionals()) { + return Status::OK(); + } + fc.CreateClusteredGraph(); + + auto queue = fc.SortedMergeNodes(); + for (auto it = queue.begin(); it != queue.end();) { + Cluster* merge_cluster = (*it).second; + ++it; + TF_RETURN_IF_ERROR(fc.ConvertMergeToXlaIf(merge_cluster)); + } + return Status::OK(); +} + } // namespace // Transformation that converts Tensorflow's graph control flow constructs into @@ -577,7 +1315,10 @@ Status FunctionalizeControlFlow(Graph* graph, } } - return Status::OK(); + // FunctionalizeControlFlow is invoked for every function, so the loops's + // bodies and conditionals that were extracted into functions will be handled + // in successive invocations. + return FunctionalizeCond::Functionalize(graph, library); } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.h b/tensorflow/compiler/tf2xla/functionalize_control_flow.h index 1535dc80b0..4d4ee3054c 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.h +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.h @@ -23,7 +23,6 @@ namespace tensorflow { // Transformation that converts tf.while_loop() loops into functional While // operators, suitable for XLA compilation. -// TODO(b/36470387): add support for conditionals. Status FunctionalizeControlFlow(Graph* graph, FunctionLibraryDefinition* library); diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc index 914c8999a6..8f155ca85e 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/cc/ops/functional_ops.h" #include "tensorflow/compiler/tf2xla/test_util.h" #include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op.h" @@ -35,6 +36,134 @@ limitations under the License. namespace tensorflow { namespace { +// Returns the names of the "then" and "else" functions for the XlaIf node in a +// graph. +Status FindIfThenAndElse(const GraphDef& graph, NameAttrList* then_fn, + NameAttrList* else_fn) { + for (const NodeDef& node : graph.node()) { + if (node.op() == "XlaIf") { + const NameAttrList* result; + TF_RETURN_IF_ERROR(GetNodeAttr(node, "then_branch", &result)); + *then_fn = *result; + TF_RETURN_IF_ERROR(GetNodeAttr(node, "else_branch", &result)); + *else_fn = *result; + return Status::OK(); + } + } + return errors::NotFound("No XlaIf node found in graph"); +} + +// Graph: +// x = array_ops.placeholder(dtypes.int32) +// y = array_ops.placeholder(dtypes.int32) +// z = control_flow_ops.cond( +// math_ops.less(y, x), lambda: math_ops.multiply(y, 17), +// lambda: math_ops.add(x, 23)) +TEST(FunctionalizeControlFlow, Conditional) { + Graph graph(OpRegistry::Global()); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + + auto x = ops::Placeholder(scope.WithOpName("x"), DT_INT32); + auto y = ops::Placeholder(scope.WithOpName("y"), DT_INT32); + auto less = ops::Less(scope.WithOpName("cond/Less"), y, x); + auto switch_1 = ops::Switch(scope.WithOpName("cond/Switch"), less, less); + + auto identity_t = + ops::Identity(scope.WithOpName("cond/Identity"), switch_1.output_true); + auto seventeen = ops::Const( + scope.WithOpName("cond").WithControlDependencies(identity_t), 17); + auto switch_2 = ops::Switch(scope.WithOpName("cond/Switch"), y, less); + auto mul = ops::Multiply(scope.WithOpName("cond/Mul"), switch_2.output_true, + seventeen); + + auto identity_f = + ops::Identity(scope.WithOpName("cond/Identity"), switch_1.output_false); + auto twenty_three = ops::Const( + scope.WithOpName("cond").WithControlDependencies(identity_f), 23); + auto switch_3 = ops::Switch(scope.WithOpName("cond/Switch"), x, less); + auto add = ops::Add(scope.WithOpName("cond/false/add"), + switch_3.output_false, twenty_three); + + auto merge = ops::Merge(scope.WithOpName("cond/Merge"), + std::initializer_list{add, mul}); + + TF_EXPECT_OK(scope.ToGraph(&graph)); + } + + FunctionLibraryDefinition library(OpRegistry::Global(), {}); + TF_ASSERT_OK(FunctionalizeControlFlow(&graph, &library)); + + GraphDef graph_def; + graph.ToGraphDef(&graph_def); + NameAttrList then_fn; + NameAttrList else_fn; + TF_EXPECT_OK(FindIfThenAndElse(graph_def, &then_fn, &else_fn)); + InstantiationResultForTest else_result; + TF_EXPECT_OK( + InstantiateFunctionForTest(else_fn.name(), library, &else_result)); + + // Outer graph + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto y = ops::Placeholder(scope.WithOpName("y"), DT_INT32); + auto x = ops::Placeholder(scope.WithOpName("x"), DT_INT32); + auto less = ops::Less(scope.WithOpName("cond/Less"), y, x); + auto if_op = ops::XlaIf(scope.WithOpName("cond/Merge_If"), less, + std::initializer_list{x, y, less}, then_fn, + else_fn, {DT_INT32}); + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, graph_def); + } + + // then body. + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto arg_0 = ops::_Arg(scope.WithOpName("_arg0"), DT_INT32, 0); + auto arg_1 = ops::_Arg(scope.WithOpName("_arg1"), DT_INT32, 1); + auto arg_2 = ops::_Arg(scope.WithOpName("_arg2"), DT_BOOL, 2); + auto identity = ops::Identity(scope.WithOpName("cond/Identity"), arg_2); + auto cond = ops::Const( + scope.WithOpName("cond").WithControlDependencies(identity), 17); + auto mul = ops::Mul(scope.WithOpName("cond/Mul"), arg_1, cond); + auto retval0 = ops::_Retval(scope.WithOpName("_retval0_RetVal"), mul, 0); + + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + + InstantiationResultForTest result; + TF_EXPECT_OK(InstantiateFunctionForTest(then_fn.name(), library, &result)); + + EXPECT_EQ(DataTypeVector{DT_INT32}, result.ret_types); + EXPECT_EQ((DataTypeVector{DT_INT32, DT_INT32, DT_BOOL}), result.arg_types); + TF_EXPECT_GRAPH_EQ(expected, result.gdef); + } + + // else body. + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto arg_0 = ops::_Arg(scope.WithOpName("_arg0"), DT_INT32, 0); + auto arg_1 = ops::_Arg(scope.WithOpName("_arg1"), DT_INT32, 1); + auto arg_2 = ops::_Arg(scope.WithOpName("_arg2"), DT_BOOL, 2); + auto identity = ops::Identity(scope.WithOpName("cond/Identity_1"), arg_2); + auto cond_1 = ops::Const( + scope.WithOpName("cond_1").WithControlDependencies(identity), 23); + auto add = ops::Add(scope.WithOpName("cond/false/add"), arg_0, cond_1); + auto retval0 = ops::_Retval(scope.WithOpName("_retval0_RetVal"), add, 0); + + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + + InstantiationResultForTest result; + TF_EXPECT_OK(InstantiateFunctionForTest(else_fn.name(), library, &result)); + + EXPECT_EQ(DataTypeVector{DT_INT32}, result.ret_types); + EXPECT_EQ((DataTypeVector{DT_INT32, DT_INT32, DT_BOOL}), result.arg_types); + TF_EXPECT_GRAPH_EQ(expected, result.gdef); + } +} + // Returns the names of the "cond" and "body" functions for the While node // in a graph. Status FindWhileCondAndBody(const GraphDef& graph, NameAttrList* cond, diff --git a/tensorflow/compiler/tf2xla/ops/functional_ops.cc b/tensorflow/compiler/tf2xla/ops/functional_ops.cc index c1005405f9..4a669f8e6e 100644 --- a/tensorflow/compiler/tf2xla/ops/functional_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/functional_ops.cc @@ -34,14 +34,41 @@ output = input; While (Cond(output)) { output = Body(output) } input: A list of input tensors whose types are T. output: A list of output tensors whose types are T. cond: A function takes 'input' and returns a tensor. If the tensor is - a scalar of non-boolean, the scalar is converted to a boolean - according to the following rule: if the scalar is a numerical - value, non-zero means True and zero means False; if the scalar is - a string, non-empty means True and empty means False. If the - tensor is not a scalar, non-emptiness means True and False - otherwise. + a scalar of non-boolean, the scalar is converted to a boolean + according to the following rule: if the scalar is a numerical + value, non-zero means True and zero means False; if the scalar is + a string, non-empty means True and empty means False. If the + tensor is not a scalar, non-emptiness means True and False + otherwise. body: A function that takes a list of tensors and returns another list of tensors. Both lists have the same types as specified by T. )doc"); +// TODO(b/37549631) setting the If Op to always be stateful is too +// conservative. +REGISTER_OP("XlaIf") + .Input("cond: Tcond") + .Input("inputs: Tin") + .Output("output: Tout") + .Attr("Tcond: type") + .Attr("then_branch: func") + .Attr("else_branch: func") + .Attr("Tin: list(type) >= 0") + .Attr("Tout: list(type) >= 0") + .SetIsStateful() + .SetShapeFn(shape_inference::UnknownShape) + .Doc(R"doc( +output = cond ? then_branch(inputs) : else_branch(inputs). + +cond: A boolean scalar. +inputs: A list of input tensors. +output: A list of tensors returned by either then_branch(inputs) or + else_branch(inputs). The input shapes of the then_branch and + else_branch must match. +then_branch: A function takes 'inputs' and returns a list of tensors, + whose types are the same as what else_branch returns. +else_branch: A function takes 'inputs' and returns a list of tensors. + whose types are the same as what then_branch returns. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index b341eab7ce..29aac913f0 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1636,6 +1636,9 @@ class CondContext(ControlFlowContext): self._values.add(result.name) with ops.control_dependencies(None): result = _SwitchRefOrTensor(result, self._pred)[self._branch] + if self._outer_context: + self._outer_context.AddInnerOp(result.op) + result.op.graph.prevent_fetching(result.op) # pylint: disable=protected-access result.op._set_control_flow_context(self) @@ -1678,6 +1681,9 @@ class CondContext(ControlFlowContext): if self._outer_context or not IsLoopExit(op): op.graph.prevent_fetching(op) + if self._outer_context: + self._outer_context.AddInnerOp(op) + def _ProcessOutputTensor(self, val): """Process an output tensor of a conditional branch.""" real_val = val -- GitLab From b0e751a73d211872f8d937e5778b9e0e0a7b950b Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Thu, 5 Oct 2017 09:45:14 -0700 Subject: [PATCH 058/909] Add dilation rates support for ConvolutionDescriptor... ...in stream executor. In preparation for the support of native cudnn dilated convolution. PiperOrigin-RevId: 171165137 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 12 ++++++---- tensorflow/stream_executor/dnn.cc | 15 +++++++++---- tensorflow/stream_executor/dnn.h | 25 +++++++++++++++++++++ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index fc205f61fa..bf8380ebbd 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -583,6 +583,7 @@ class ScopedConvolutionDescriptor { } const auto& strides64 = convolution_descriptor.strides(); const auto& padding64 = convolution_descriptor.padding(); + const auto& dilations64 = convolution_descriptor.dilations(); if (convolution_descriptor.pad_alignment() == dnn::PadAlignment::kTensorFlowPadding) { LOG(ERROR) << "TensorFlow padding alignment is not supported."; @@ -591,15 +592,19 @@ class ScopedConvolutionDescriptor { // cuDNN requires arrays of ints. std::vector strides(convolution_descriptor.ndims()); std::vector padding(convolution_descriptor.ndims()); + std::vector dilations(convolution_descriptor.ndims()); std::transform(strides64.cbegin(), strides64.cend(), strides.begin(), &CheckedNarrowing); std::transform(padding64.cbegin(), padding64.cend(), padding.begin(), &CheckedNarrowing); - std::vector upscale(convolution_descriptor.ndims(), 1); + // TODO(yangzihao): Test with negative dilation to make sure that cudnn + // doesn't crash. + std::transform(dilations64.cbegin(), dilations64.cend(), dilations.begin(), + &CheckedNarrowing); status = wrap::cudnnSetConvolutionNdDescriptor( parent_, handle_, convolution_descriptor.ndims(), padding.data(), - strides.data(), upscale.data(), + strides.data(), dilations.data(), // NOTE(keveman): cuDNN supports convolution and cross correlation. // However, almost all the use cases do cross correlation, so just // hard coding it here. @@ -2982,7 +2987,6 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( if (memory_limit_bytes < 0) { memory_limit_bytes = 0; } - cudnnConvolutionBwdDataAlgo_t algo_to_use; cudnnStatus_t status = wrap::cudnnGetConvolutionBackwardDataAlgorithm( parent_, ToHandle(dnn_handle_), @@ -2995,7 +2999,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( /*algo=*/&algo_to_use); CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << "Unable to find a suitable " "algorithm for doing backward " - "filter convolution"; + "data convolution"; return algo_to_use; }; diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index ed9bdf2bc2..2c40e18f5c 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -424,6 +424,7 @@ int64 FilterDescriptor::ComputeWeightCount() const { ConvolutionDescriptor::ConvolutionDescriptor(int ndims) : zero_padding_(ndims, 0), filter_strides_(ndims, 1), + dilation_rates_(ndims, 1), pad_alignment_(PadAlignment::kDefault), ndims_(ndims) {} @@ -435,15 +436,18 @@ ConvolutionDescriptor::~ConvolutionDescriptor() {} string ConvolutionDescriptor::ToString() const { string padding; string strides; + string dilations; for (int i = 0; i < ndims_; i++) { port::Appendf(&padding, "%lld ", zero_padding_[i]); port::Appendf(&strides, "%lld ", filter_strides_[i]); + port::Appendf(&dilations, "%lld ", dilation_rates_[i]); } - return port::Printf("{zero_padding: %s pad_alignment: %s filter_strides: %s}", - padding.c_str(), - PadAlignmentString(pad_alignment_).c_str(), - strides.c_str()); + return port::Printf( + "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: " + "%s}", + padding.c_str(), PadAlignmentString(pad_alignment_).c_str(), + strides.c_str(), dilations.c_str()); } string ConvolutionDescriptor::ToShortString() const { @@ -455,6 +459,9 @@ string ConvolutionDescriptor::ToShortString() const { for (int i = 0; i < ndims_; i++) { port::Appendf(&desc, "_s%d:%lld", i, filter_strides_[i]); } + for (int i = 0; i < ndims_; i++) { + port::Appendf(&desc, "_d%d:%lld", i, dilation_rates_[i]); + } return desc; } diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 4beb46090c..5fe523602a 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -487,6 +487,10 @@ string PadAlignmentString(PadAlignment alignment); // window is moved in the "y dimension" according to this stride value. // - horizontal_filter_stride: analogous to the vertical stride above, but in // the "x dimension". +// - vertical_dilation_rate: there will be (vertical_dilation_rate - 1) skipped +// cells between each filter element in the "y dimension". +// - horizontal_dilation_rate: there will be (horizontal_dilation_rate - 1) +// skipped cells between each filter element in the "x dimension". class ConvolutionDescriptor { public: // By default construction, there is no zero-padding and the filter stride is @@ -523,6 +527,18 @@ class ConvolutionDescriptor { SetDim(&filter_strides_, dim, value); return *this; } + ConvolutionDescriptor& set_vertical_dilation_rate(int64 value) { + SetDim(&dilation_rates_, DimIndex::Y, value); + return *this; + } + ConvolutionDescriptor& set_horizontal_dilation_rate(int64 value) { + SetDim(&dilation_rates_, DimIndex::X, value); + return *this; + } + ConvolutionDescriptor& set_dilation_rate(DimIndex dim, int64 value) { + SetDim(&dilation_rates_, dim, value); + return *this; + } ConvolutionDescriptor& set_pad_alignment(PadAlignment pad_alignment) { pad_alignment_ = pad_alignment; return *this; @@ -539,19 +555,28 @@ class ConvolutionDescriptor { int64 horizontal_filter_stride() const { return GetDim(filter_strides_, DimIndex::X); } + int64 vertical_dilation_rate() const { + return GetDim(dilation_rates_, DimIndex::Y); + } + int64 horizontal_dilation_rate() const { + return GetDim(dilation_rates_, DimIndex::X); + } int zero_padding(DimIndex dim) const { return GetDim(zero_padding_, dim); } int filter_stride(DimIndex dim) const { return GetDim(filter_strides_, dim); } + int dilation_rate(DimIndex dim) const { return GetDim(dilation_rates_, dim); } PadAlignment pad_alignment() const { return pad_alignment_; } int ndims() const { return ndims_; } std::vector strides() const { return filter_strides_; } + std::vector dilations() const { return dilation_rates_; } std::vector padding() const { return zero_padding_; } private: // Stored as: .. y, x. std::vector zero_padding_; std::vector filter_strides_; + std::vector dilation_rates_; PadAlignment pad_alignment_; int ndims_; // TODO(leary) cudnn provides these fields, but need to characterize what -- GitLab From 09fa4a4e355171fa30f5793ff9eb1b61a4e34ed0 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Thu, 5 Oct 2017 09:45:58 -0700 Subject: [PATCH 059/909] Fix ConvBackpropComputeDimensionsV2() interface. PiperOrigin-RevId: 171165222 --- tensorflow/core/kernels/conv_grad_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/conv_grad_ops.h b/tensorflow/core/kernels/conv_grad_ops.h index 3a3492304b..e068fb8684 100644 --- a/tensorflow/core/kernels/conv_grad_ops.h +++ b/tensorflow/core/kernels/conv_grad_ops.h @@ -248,7 +248,7 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims, Status ConvBackpropComputeDimensionsV2( StringPiece label, int num_spatial_dims, const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& out_backprop_shape, - const std::vector& dilations, const std::vector& strides, + const gtl::ArraySlice& dilations, const std::vector& strides, Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims); } // namespace tensorflow -- GitLab From f97195c6f936ee3edd9ad2620c091b742bb45476 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 5 Oct 2017 09:58:20 -0700 Subject: [PATCH 060/909] Use --config=monolithic for the Android CI build --- tensorflow/tools/ci_build/builds/android_full.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/android_full.sh b/tensorflow/tools/ci_build/builds/android_full.sh index 63250e0a4d..9d449241e8 100755 --- a/tensorflow/tools/ci_build/builds/android_full.sh +++ b/tensorflow/tools/ci_build/builds/android_full.sh @@ -40,7 +40,7 @@ rm -rf ${AAR_LIB_TMP} for CPU in ${CPUS//,/ } do echo "========== Building native libs for Android ${CPU} ==========" - bazel build -c opt --cpu=${CPU} \ + bazel build -c opt --config=monolithic --cpu=${CPU} \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ //tensorflow/core:android_tensorflow_lib \ @@ -62,7 +62,7 @@ done # in assets/ dir (see https://github.com/bazelbuild/bazel/issues/2334) # TODO(gunan): remove extra flags once sandboxing is enabled for all builds. echo "========== Building TensorFlow Android Jar and Demo ==========" -bazel --bazelrc=/dev/null build -c opt --fat_apk_cpu=${CPUS} \ +bazel --bazelrc=/dev/null build -c opt --config=monolithic --fat_apk_cpu=${CPUS} \ --spawn_strategy=sandboxed --genrule_strategy=sandboxed \ //tensorflow/contrib/android:android_tensorflow_inference_java \ //tensorflow/contrib/android:android_tensorflow_inference_java.aar \ -- GitLab From 7e7d55c0f5bae2380a76d39fbc51131f843c0320 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 09:50:17 -0700 Subject: [PATCH 061/909] [tf.data] Iterator and data/nest documentation fixes PiperOrigin-RevId: 171165796 --- tensorflow/python/data/ops/iterator_ops.py | 8 ++++---- tensorflow/python/data/util/nest.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index d11112d004..d4f05a055a 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -138,21 +138,21 @@ class Iterator(object): This method allows you to define a "feedable" iterator where you can choose between concrete iterators by feeding a value in a @{tf.Session.run} call. In that case, `string_handle` would a @{tf.placeholder}, and you would feed - it with the value of @{tf.contrib.data.Iterator.string_handle} in each step. + it with the value of @{tf.data.Iterator.string_handle} in each step. For example, if you had two iterators that marked the current position in a training dataset and a test dataset, you could choose which to use in each step as follows: ```python - train_iterator = tf.contrib.data.Dataset(...).make_one_shot_iterator() + train_iterator = tf.data.Dataset(...).make_one_shot_iterator() train_iterator_handle = sess.run(train_iterator.string_handle()) - test_iterator = tf.contrib.data.Dataset(...).make_one_shot_iterator() + test_iterator = tf.data.Dataset(...).make_one_shot_iterator() test_iterator_handle = sess.run(test_iterator.string_handle()) handle = tf.placeholder(tf.string, shape=[]) - iterator = tf.contrib.data.Iterator.from_string_handle( + iterator = tf.data.Iterator.from_string_handle( handle, train_iterator.output_types) next_element = iterator.get_next() diff --git a/tensorflow/python/data/util/nest.py b/tensorflow/python/data/util/nest.py index 83908d8a0e..421513cafc 100644 --- a/tensorflow/python/data/util/nest.py +++ b/tensorflow/python/data/util/nest.py @@ -106,7 +106,7 @@ def is_sequence(seq): NOTE(mrry): This differs from `tensorflow.python.util.nest.is_sequence()`, which *does* treat a Python list as a sequence. For ergonomic - reasons, `tf.contrib.data` users would prefer to treat lists as + reasons, `tf.data` users would prefer to treat lists as implict `tf.Tensor` objects, and dicts as (nested) sequences. Args: -- GitLab From 5f97262ae6f36000e141b01b33c55f8eb1ee94a1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 5 Oct 2017 09:50:49 -0700 Subject: [PATCH 062/909] Splits backprop.py in two files, one of which can be converted to C PiperOrigin-RevId: 171165855 --- tensorflow/python/eager/BUILD | 8 + tensorflow/python/eager/backprop.py | 380 ++++++--------------- tensorflow/python/eager/custom_gradient.py | 2 +- tensorflow/python/eager/function.py | 2 +- tensorflow/python/eager/imperative_grad.py | 227 ++++++++++++ tensorflow/python/framework/ops.py | 2 +- 6 files changed, 335 insertions(+), 286 deletions(-) create mode 100644 tensorflow/python/eager/imperative_grad.py diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 09ec4ee12b..4069ef1c70 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -339,7 +339,9 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + ":imperative_grad", "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", @@ -425,3 +427,9 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +py_library( + name = "imperative_grad", + srcs = ["imperative_grad.py"], + deps = [":tape"], +) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 1d729cc2e1..3c84cbbd6f 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import functools import operator import threading @@ -28,6 +27,7 @@ import six from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import execute +from tensorflow.python.eager import imperative_grad from tensorflow.python.eager import tape from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -36,288 +36,10 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops -from tensorflow.python.util import tf_contextlib +from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect -# If over MIN_AGGREGATE_COUNT gradients are accumulated and the total -# memory consumption is over MIN_AGGREGATE_BYTES, do an early aggregation -# so as to release the gradient tensor to save memory. -_MIN_AGGREGATE_COUNT = 4 -_MIN_AGGREGATE_BYTES = 128 * 1024 * 1024 - -# Terminology: -# -# - op: a possibly composite operation, which has an entry in the tape -# - target: dy in dx/dy -# - source: dx in dx/dy -# - tensor: one of the many inputs or outputs of an operation -# -# Below here we do the gradient algorithm. It works as follows: -# -# First we filter the tape to just the subset of operations we want to -# differentiate. In the process of doing so we count how many times each Tensor -# is used as an input to an op (so we know when we're done computing gradients -# for that Tensor). We also count, for each tape entry, how many of its output -# Tensors need gradients to be computed (Tensors which are not used do not need -# any gradients to be computed). -# -# Finally, we start a backprop stack with a set of tape entries for which we -# have all gradients available. This set usually is a subset of the set of -# targets (not all since targets which have outputs in the tape will not have -# gradients available initially). -# -# Then we repeatedly pop an entry from the stack, run its backprop, and update -# the gradients of its inputs. Once we have computed all gradients for a single -# input we can mark this input as done, and this can trigger adding an entry to -# the stack if all outputs of that entry are now done. -# -# When the stack is empty we have gradients for all tensors we're interested in. - - -def _prepare_backprop(target, tensor_to_op, op_to_entry, id_sources): - """Filters the tape to only include relevant entries and counts tensor usages. - - Args: - target: the target to optimize. - tensor_to_op: Map from tensor id to key in op_to_entry that produced it. - op_to_entry: Map from op id to a tape.TapeEntry object - id_sources: the ids of the sources wrt the gradient is being taken. - - Returns: - usage counts (how many entries downstream from a tensor use it) - op_to_entry_map: entry map (a filtered tape, with only the relevant - entries), - missing: map from tensor id to how many downstream gradients still need - to be computed before this tensor's gradient can be computed. - """ - if isinstance(target, (ops.Tensor)): - tensor_stack = [ops.tensor_id(target)] - else: - tensor_stack = list([ops.tensor_id(x) for x in target]) - tensor_usage_counts = {} - o_to_e = {} # Copy of just the bits we need from op_to_entry - while tensor_stack: - t = tensor_stack.pop() - op = tensor_to_op.get(t, None) - # op is None if the tensor is a source (i.e. was watched directly) - if op is None or op in o_to_e: - continue - op_trace = op_to_entry[op] - o_to_e[op] = op_trace - for it in op_trace.input_ids: - if it in tensor_usage_counts: - tensor_usage_counts[it] += 1 - else: - tensor_usage_counts[it] = 1 - if it not in id_sources and it in tensor_to_op: - tensor_stack.append(it) - op_missing_tensor_counts = collections.defaultdict(int) - for t in tensor_usage_counts: - if t in tensor_to_op and tensor_to_op[t] is not None: - op_missing_tensor_counts[tensor_to_op[t]] += 1 - return tensor_usage_counts, o_to_e, op_missing_tensor_counts - - -def _initialize_backprop_stack(op_to_entry, op_missing_tensor): - """Returns the set of tape entries which are available for backprop.""" - ready_ops = [] - for op in op_to_entry: - if op not in op_missing_tensor: - ready_ops.append(op) - return ready_ops - - -def _initial_gradients(target, output_gradients, tensor_usage_counts): - """Computes the initial gradients for each Tensor.""" - # Initialize the backprop stack - gradients = collections.defaultdict(list) - if isinstance(target, ops.Tensor): - if output_gradients is not None: - output_gradient = output_gradients - else: - output_gradient = array_ops.ones_like(target) - gradients[ops.tensor_id(target)].append(output_gradient) - else: - for i, t in enumerate(target): - if ops.tensor_id(t) in tensor_usage_counts: - # Can't provide a gradient of something we're trying to differentiate - assert output_gradients is None or output_gradients[i] is None - else: - if output_gradients is None or output_gradients[i] is None: - out_grad = array_ops.ones_like(t) - else: - out_grad = output_gradients[i] - gradients[ops.tensor_id(t)].append(out_grad) - return gradients - - -@tf_contextlib.contextmanager -def _no_op(): - yield - - -def _aggregate_grads(gradients): - """Aggregate gradients from multiple sources. - - Args: - gradients: A list of 'Tensor' or 'IndexedSlices' gradients. - - Returns: - If 'gradients' only has 'Tensor', returns an aggregated 'Tensor'. - Otherwise returns an aggregated 'IndexedSlices'. - """ - assert gradients, "No gradients to aggregate" - - if len(gradients) == 1: - return gradients[0] - if all([isinstance(g, ops.Tensor) for g in gradients]): - return math_ops.add_n(gradients) - else: - assert all([isinstance(g, (ops.Tensor, ops.IndexedSlices)) - for g in gradients]) - indexed_slices_list = [] - for grad in gradients: - # TODO(xpan): Support nested IndexedSlices and core IndexedSlices - if isinstance(grad, ops.Tensor): - indexed_slices = ops.IndexedSlices( - grad, - constant_op.constant(range(grad.shape[0])), - constant_op.constant(grad.shape.as_list())) - indexed_slices_list.append(indexed_slices) - else: - indexed_slices_list.append(grad) - - # Dense shapes from all gradients should be the same. - dense_shape = indexed_slices_list[0].dense_shape - # For simplicity now, always cast to int64. - indices = array_ops.concat([math_ops.cast(x.indices, dtypes.int64) - for x in indexed_slices_list], 0) - values = array_ops.concat([x.values for x in indexed_slices_list], 0) - return ops.IndexedSlices(values, indices, dense_shape) - - -def _add_new_grads(gradients, gradients_size, tid, grad): - """Adds a new gradient and maybe aggregate the gradients. - - Args: - gradients: A dict map from tensor id to list of gradients. - gradients_size: A dict map from tensor id to its total units. Might - not be initialized. - tid: Tensor id. - grad: New gradient for the `tid`, either a Tensor or IndexedSlices. - - Raises: - ValueError: if `grad` is neight Tensor nor IndexedSlices. - """ - tensor_grads = gradients[tid] - tensor_grads.append(grad) - if len(tensor_grads) < _MIN_AGGREGATE_COUNT: - return - elif tid not in gradients_size: - if isinstance(grad, ops.Tensor): - size = functools.reduce(operator.mul, grad._shape_tuple(), 1) # pylint: disable=protected-access - elif isinstance(grad, ops.IndexedSlices): - size = functools.reduce(operator.mul, grad.values._shape_tuple(), 1) # pylint: disable=protected-access - else: - raise ValueError("Unexpected gradient type: %s" % type(grad)) - gradients_size[tid] = size - else: - size = gradients_size[tid] - - # For simplicity, assume each element to be 4 bytes now. - if len(tensor_grads) * size * 4 > _MIN_AGGREGATE_BYTES: - gradients[tid] = [_aggregate_grads(tensor_grads)] - - -def imperative_grad( - target, - sources, - output_gradients=None): - """Computes gradients from the imperatively defined tape on top of the stack. - - Works by filtering the tape, computing how many downstream usages are of each - tensor and entry, and repeatedly applying backward functions until we have - gradients for all sources. - - Args: - target: either a Tensor or list of Tensors to be differentiated. - sources: list of Tensors for which we want gradients - output_gradients: if not None, a list of gradient provided for each Target, - or None if we are to use the target's computed downstream gradient. - - Returns: - the gradient wrt each of the sources. - - Raises: - RuntimeError: if something goes wrong. - ValueError: if there is no sequence of differentiable operations connecting - a source and any target Tensor. This can happen either if the target is - not computed based on the source, if the tracing was set up incorrectly, - or if only non-differentiable functions of the source were used in the - computation of target. - """ - if not tape._tape_stack.stack: # pylint: disable=protected-access - raise RuntimeError("Computing a gradient with no tape present") - bp_tape = tape.pop_tape() - tensor_to_op, op_to_entry = bp_tape.export() - # This overwrites the op_to_entry variable, which will release all memory used - # to keep traces that are irrelevant to the gradient computation we're doing - # here. - id_sources = [ops.tensor_id(t) for t in sources] - tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( - target, tensor_to_op, op_to_entry, id_sources) - ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) - gradients = _initial_gradients(target, output_gradients, - tensor_usage_counts) - gradients_size = dict() - # Now exhaust the backprop stack - while ready_ops: - op = ready_ops.pop() - op_trace = op_to_entry.pop(op) - out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] - for i in range(len(out_gradients)): - if out_gradients[i] is None: - # TODO(apassos) this should be in the right device - none_indices = _grad_fn_accepts_none_for_indices.get( - op_trace.op_type, None) - if none_indices is None or i not in none_indices: - out_gradients[i] = array_ops.zeros( - *op_trace.output_shape_and_dtype[i]) - else: - out_gradients[i] = _aggregate_grads(out_gradients[i]) - - in_gradients = op_trace.backward_function( - *(out_gradients + op_trace.side_outputs)) - in_gradients = ([in_gradients] - if isinstance(in_gradients, (ops.Tensor, - ops.IndexedSlices, - type(None))) - else in_gradients) - for i, t in enumerate(op_trace.input_ids): - if in_gradients[i] is not None: - _add_new_grads(gradients, gradients_size, t, in_gradients[i]) - if tensor_usage_counts.get(t, 0) > 0: - tensor_usage_counts[t] -= 1 - if (t in tensor_to_op - and tensor_usage_counts[t] == 0 - and t not in id_sources): - in_op = tensor_to_op[t] - if in_op is None: - continue - if op_missing_tensor.get(in_op, 0) > 0: - op_missing_tensor[in_op] -= 1 - if op_missing_tensor.get(in_op, 0) == 0: - ready_ops.append(in_op) - result = [] - for i, s in enumerate(sources): - g = gradients.get(ops.tensor_id(s), None) - if g is None: - result.append(None) - else: - result.append(_aggregate_grads(g)) - return result - _op_attr_type_cache = {} @@ -557,7 +279,7 @@ def _record_gradient(op_name, inputs, attrs, results, name): if _tracing: print("Gradient for", (name if name else op_name), "inputs", op_inputs, "output_grads", orig_outputs, "gradients", result) - return result + return nest.flatten(result) tape.record_operation(op_name, results, inputs, [], grad_fn) if _tracing: @@ -615,7 +337,9 @@ def implicit_val_and_grad(f): end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] - grad = imperative_grad(end_node, sources) + grad = imperative_grad.imperative_grad(_default_vspace, + nest.flatten(end_node), + sources) return end_node, list(zip(grad, variables)) return grad_fn @@ -849,6 +573,96 @@ def val_and_grad_function(f, params=None): sources.append(args[i]) tape.watch(args[i]) result = f(*args) - return result, imperative_grad(result, sources, output_gradients=dy) + return result, imperative_grad.imperative_grad( + _default_vspace, nest.flatten(result), sources, + output_gradients=nest.flatten(dy) if dy is not None else None) return decorated + + +def _aggregate_grads(gradients): + """Aggregate gradients from multiple sources. + + Args: + gradients: A list of 'Tensor' or 'IndexedSlices' gradients. + + Returns: + If 'gradients' only has 'Tensor', returns an aggregated 'Tensor'. + Otherwise returns an aggregated 'IndexedSlices'. + """ + assert gradients, "No gradients to aggregate" + + if len(gradients) == 1: + return gradients[0] + if all([isinstance(g, ops.Tensor) for g in gradients]): + return math_ops.add_n(gradients) + else: + assert all([isinstance(g, (ops.Tensor, ops.IndexedSlices)) + for g in gradients]) + indexed_slices_list = [] + for grad in gradients: + # TODO(xpan): Support nested IndexedSlices and core IndexedSlices + if isinstance(grad, ops.Tensor): + indexed_slices = ops.IndexedSlices( + grad, + constant_op.constant(range(grad.shape[0])), + constant_op.constant(grad.shape.as_list())) + indexed_slices_list.append(indexed_slices) + else: + indexed_slices_list.append(grad) + + # Dense shapes from all gradients should be the same. + dense_shape = indexed_slices_list[0].dense_shape + # For simplicity now, always cast to int64. + indices = array_ops.concat([math_ops.cast(x.indices, dtypes.int64) + for x in indexed_slices_list], 0) + values = array_ops.concat([x.values for x in indexed_slices_list], 0) + return ops.IndexedSlices(values, indices, dense_shape) + + +# If over MIN_AGGREGATE_COUNT gradients are accumulated and the total +# memory consumption is over MIN_AGGREGATE_BYTES, do an early aggregation +# so as to release the gradient tensor to save memory. +_MIN_AGGREGATE_COUNT = 4 +_MIN_AGGREGATE_BYTES = 128 * 1024 * 1024 + + +def _add_new_grads(gradients, gradients_size, tid, grad): + """Adds a new gradient and maybe aggregate the gradients. + + Args: + gradients: A dict map from tensor id to list of gradients. + gradients_size: A dict map from tensor id to its total units. Might + not be initialized. + tid: Tensor id. + grad: New gradient for the `tid`, either a Tensor or IndexedSlices. + + Raises: + ValueError: if `grad` is neight Tensor nor IndexedSlices. + """ + tensor_grads = gradients[tid] + tensor_grads.append(grad) + if len(tensor_grads) < _MIN_AGGREGATE_COUNT: + return + elif tid not in gradients_size: + if isinstance(grad, ops.Tensor): + size = functools.reduce(operator.mul, grad._shape_tuple(), 1) # pylint: disable=protected-access + elif isinstance(grad, ops.IndexedSlices): + size = functools.reduce(operator.mul, grad.values._shape_tuple(), 1) # pylint: disable=protected-access + else: + raise ValueError("Unexpected gradient type: %s" % type(grad)) + gradients_size[tid] = size + else: + size = gradients_size[tid] + + # For simplicity, assume each element to be 4 bytes now. + if len(tensor_grads) * size * 4 > _MIN_AGGREGATE_BYTES: + gradients[tid] = [_aggregate_grads(tensor_grads)] + + +_default_vspace = imperative_grad.VSpace( + add_new_grads_fn=_add_new_grads, + aggregate_fn=_aggregate_grads, + tensor_id=ops.tensor_id, + zeros=array_ops.zeros, + ones_like=array_ops.ones_like) diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 67c9015bf0..4360e53225 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -78,7 +78,7 @@ def custom_gradient(f): # second derivative this way if they capture any output tensors. Change the # signature of custom_gradient. def actual_grad_fn(*outputs): - return grad_fn(*outputs) + return nest.flatten(grad_fn(*outputs)) flat_result = nest.flatten(result) tape.record_operation( diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index cb70d23f06..6ffc914f73 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -88,7 +88,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False): else: captured_value = captured_value[1] tape.record_operation("captured_value", [captured_value], [value], [], - lambda x: x) + lambda x: [x]) return captured_value diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py new file mode 100644 index 0000000000..b81f5bba14 --- /dev/null +++ b/tensorflow/python/eager/imperative_grad.py @@ -0,0 +1,227 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Code for backpropagation using the tape utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.python.eager import tape + + +# Terminology: +# +# - op: a possibly composite operation, which has an entry in the tape +# - target: dy in dx/dy +# - source: dx in dx/dy +# - tensor: one of the many inputs or outputs of an operation +# +# Below here we do the gradient algorithm. It works as follows: +# +# First we filter the tape to just the subset of operations we want to +# differentiate. In the process of doing so we count how many times each Tensor +# is used as an input to an op (so we know when we're done computing gradients +# for that Tensor). We also count, for each tape entry, how many of its output +# Tensors need gradients to be computed (Tensors which are not used do not need +# any gradients to be computed). +# +# Finally, we start a backprop stack with a set of tape entries for which we +# have all gradients available. This set usually is a subset of the set of +# targets (not all since targets which have outputs in the tape will not have +# gradients available initially). +# +# Then we repeatedly pop an entry from the stack, run its backprop, and update +# the gradients of its inputs. Once we have computed all gradients for a single +# input we can mark this input as done, and this can trigger adding an entry to +# the stack if all outputs of that entry are now done. +# +# When the stack is empty we have gradients for all tensors we're interested in. +def _prepare_backprop(vspace, target, tensor_to_op, op_to_entry, id_sources): + """Filters the tape to only include relevant entries and counts tensor usages. + + Args: + vspace: information about the space we're differentiating in. + target: the target to optimize. + tensor_to_op: Map from tensor id to key in op_to_entry that produced it. + op_to_entry: Map from op id to a tape.TapeEntry object + id_sources: the ids of the sources wrt the gradient is being taken. + + Returns: + usage counts (how many entries downstream from a tensor use it) + op_to_entry_map: entry map (a filtered tape, with only the relevant + entries), + missing: map from tensor id to how many downstream gradients still need + to be computed before this tensor's gradient can be computed. + """ + tensor_stack = [vspace.tensor_id(x) for x in target] + tensor_usage_counts = {} + o_to_e = {} # Copy of just the bits we need from op_to_entry + while tensor_stack: + t = tensor_stack.pop() + op = tensor_to_op.get(t, None) + # op is None if the tensor is a source (i.e. was watched directly) + if op is None or op in o_to_e: + continue + op_trace = op_to_entry[op] + o_to_e[op] = op_trace + for it in op_trace.input_ids: + if it in tensor_usage_counts: + tensor_usage_counts[it] += 1 + else: + tensor_usage_counts[it] = 1 + if it not in id_sources and it in tensor_to_op: + tensor_stack.append(it) + op_missing_tensor_counts = collections.defaultdict(int) + for t in tensor_usage_counts: + if t in tensor_to_op and tensor_to_op[t] is not None: + op_missing_tensor_counts[tensor_to_op[t]] += 1 + return tensor_usage_counts, o_to_e, op_missing_tensor_counts + + +def _initialize_backprop_stack(op_to_entry, op_missing_tensor): + """Returns the set of tape entries which are available for backprop.""" + ready_ops = [] + for op in op_to_entry: + if op not in op_missing_tensor: + ready_ops.append(op) + return ready_ops + + +def _initial_gradients(vspace, target, output_gradients, tensor_usage_counts): + """Computes the initial gradients for each Tensor.""" + # Initialize the backprop stack + gradients = collections.defaultdict(list) + for i, t in enumerate(target): + if vspace.tensor_id(t) in tensor_usage_counts: + # Can't provide a gradient of something we're trying to differentiate + assert output_gradients is None or output_gradients[i] is None + else: + if output_gradients is None or output_gradients[i] is None: + out_grad = vspace.ones_like(t) + else: + out_grad = output_gradients[i] + gradients[vspace.tensor_id(t)].append(out_grad) + return gradients + + +VSpace = collections.namedtuple( + "VSpace", + ["add_new_grads_fn", "aggregate_fn", "tensor_id", "zeros", "ones_like"]) + + +def imperative_grad( + vspace, + target, + sources, + output_gradients=None): + """Computes gradients from the imperatively defined tape on top of the stack. + + Works by filtering the tape, computing how many downstream usages are of each + tensor and entry, and repeatedly applying backward functions until we have + gradients for all sources. + + Args: + vspace: the vector space in which to differentiate. + target: either a Tensor or list of Tensors to be differentiated. + sources: list of Tensors for which we want gradients + output_gradients: if not None, a list of gradient provided for each Target, + or None if we are to use the target's computed downstream gradient. + + Returns: + the gradient wrt each of the sources. + + Raises: + RuntimeError: if something goes wrong. + ValueError: if there is no sequence of differentiable operations connecting + a source and any target Tensor. This can happen either if the target is + not computed based on the source, if the tracing was set up incorrectly, + or if only non-differentiable functions of the source were used in the + computation of target. + """ + if not tape._tape_stack.stack: # pylint: disable=protected-access + raise RuntimeError("Computing a gradient with no tape present") + bp_tape = tape.pop_tape() + tensor_to_op, op_to_entry = bp_tape.export() + # This overwrites the op_to_entry variable, which will release all memory used + # to keep traces that are irrelevant to the gradient computation we're doing + # here. + id_sources = [vspace.tensor_id(t) for t in sources] + tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( + vspace, target, tensor_to_op, op_to_entry, id_sources) + ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) + gradients = _initial_gradients(vspace, target, output_gradients, + tensor_usage_counts) + gradients_size = dict() + # Now exhaust the backprop stack + while ready_ops: + op = ready_ops.pop() + op_trace = op_to_entry.pop(op) + out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] + for i in range(len(out_gradients)): + if out_gradients[i] is None: + # TODO(apassos) this should be in the right device + none_indices = _grad_fn_accepts_none_for_indices.get( + op_trace.op_type, None) + if none_indices is None or i not in none_indices: + out_gradients[i] = vspace.zeros( + *op_trace.output_shape_and_dtype[i]) + else: + out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) + + in_gradients = op_trace.backward_function( + *(out_gradients + op_trace.side_outputs)) + for i, t in enumerate(op_trace.input_ids): + if in_gradients[i] is not None: + vspace.add_new_grads_fn(gradients, gradients_size, t, in_gradients[i]) + if tensor_usage_counts.get(t, 0) > 0: + tensor_usage_counts[t] -= 1 + if (t in tensor_to_op + and tensor_usage_counts[t] == 0 + and t not in id_sources): + in_op = tensor_to_op[t] + if in_op is None: + continue + if op_missing_tensor.get(in_op, 0) > 0: + op_missing_tensor[in_op] -= 1 + if op_missing_tensor.get(in_op, 0) == 0: + ready_ops.append(in_op) + result = [] + for i, s in enumerate(sources): + g = gradients.get(vspace.tensor_id(s), None) + if g is None: + result.append(None) + else: + result.append(vspace.aggregate_fn(g)) + return result + + +# TODO(agarwal): use an automatic mechanism for handling None arguments to +# gradient functions. +# Some gradient functions can accept None arguments for gradients. The following +# maps the operation name to the indices at which the corresponding gradient +# function can accept None values. +# e.g. FusedBatchNorm outputs 5 values and hence receives 5 gradient values +# during backprop. However the gradient function uses only the first of those +# values and ignores the rest. The entry, "FusedBatchNorm": [1, 2, 3, 4], +# indicates that only the gradient corresponding to index 0 is used, and the +# gradient values at indices 1-4 are ignored (and hence can be None). The +# backprop algorithm can then leverage this by not constructing zeros to +# pass for those indices. +_grad_fn_accepts_none_for_indices = { + "SoftmaxCrossEntropyWithLogits": [1], + "FusedBatchNorm": [1, 2, 3, 4] +} diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 50aa070985..ae84297690 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -675,7 +675,7 @@ class _EagerTensorBase(Tensor): if not context.in_graph_mode(): self_device = self.device def grad_fun(dresult): - return dresult._copy(device_name=self_device) + return [dresult._copy(device_name=self_device)] tape.record_operation("_copy", [new_tensor], [self], [], grad_fun) return new_tensor # pylint: enable=protected-access -- GitLab From c49eeeee5463aff02b4bafbd1596288ba4b27739 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 5 Oct 2017 09:54:37 -0700 Subject: [PATCH 063/909] Add a Cython build dependency, start using some Cython tensor utilities PiperOrigin-RevId: 171166294 --- .../core/platform/default/build_config.bzl | 71 +++++++++++- tensorflow/python/BUILD | 11 +- .../python/framework/fast_tensor_util.pyx | 103 ++++++++++++++++++ tensorflow/python/framework/tensor_util.py | 3 +- tensorflow/workspace.bzl | 11 ++ third_party/cython.BUILD | 28 +++++ 6 files changed, 222 insertions(+), 5 deletions(-) create mode 100644 tensorflow/python/framework/fast_tensor_util.pyx create mode 100644 third_party/cython.BUILD diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 8a67951b24..51d37291ee 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -28,6 +28,76 @@ def tf_deps(deps, suffix): return tf_deps +# Modified from @cython//:Tools/rules.bzl +def pyx_library( + name, + deps=[], + py_deps=[], + srcs=[], + **kwargs): + """Compiles a group of .pyx / .pxd / .py files. + + First runs Cython to create .cpp files for each input .pyx or .py + .pxd + pair. Then builds a shared object for each, passing "deps" to each cc_binary + rule (includes Python headers by default). Finally, creates a py_library rule + with the shared objects and any pure Python "srcs", with py_deps as its + dependencies; the shared objects can be imported like normal Python files. + + Args: + name: Name for the rule. + deps: C/C++ dependencies of the Cython (e.g. Numpy headers). + py_deps: Pure Python dependencies of the final library. + srcs: .py, .pyx, or .pxd files to either compile or pass through. + **kwargs: Extra keyword arguments passed to the py_library. + """ + # First filter out files that should be run compiled vs. passed through. + py_srcs = [] + pyx_srcs = [] + pxd_srcs = [] + for src in srcs: + if src.endswith(".pyx") or (src.endswith(".py") + and src[:-3] + ".pxd" in srcs): + pyx_srcs.append(src) + elif src.endswith(".py"): + py_srcs.append(src) + else: + pxd_srcs.append(src) + if src.endswith("__init__.py"): + pxd_srcs.append(src) + + # Invoke cython to produce the shared object libraries. + cpp_outs = [src.split(".")[0] + ".cpp" for src in pyx_srcs] + native.genrule( + name = name + "_cython_translation", + srcs = pyx_srcs, + outs = cpp_outs, + cmd = ("PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS)" + # Rename outputs to expected location. + + """ && python -c 'import shutil, sys; n = len(sys.argv); [shutil.copyfile(src.split(".")[0] + ".cpp", dst) for src, dst in zip(sys.argv[1:], sys.argv[1+n//2:])]' $(SRCS) $(OUTS)"""), + tools = ["@cython//:cython_binary"] + pxd_srcs, + ) + + shared_objects = [] + for src in pyx_srcs: + stem = src.split(".")[0] + shared_object_name = stem + ".so" + native.cc_binary( + name=shared_object_name, + srcs=[stem + ".cpp"], + deps=deps + ["//util/python:python_headers"], + linkshared = 1, + ) + shared_objects.append(shared_object_name) + + # Now create a py_library with these shared objects as data. + native.py_library( + name=name, + srcs=py_srcs, + deps=py_deps, + srcs_version = "PY2AND3", + data=shared_objects, + **kwargs + ) def _proto_cc_hdrs(srcs, use_grpc_plugin=False): ret = [s[:-len(".proto")] + ".pb.h" for s in srcs] @@ -299,7 +369,6 @@ def tf_additional_proto_srcs(): def tf_additional_all_protos(): return ["//tensorflow/core:protos_all"] - def tf_protos_all_impl(): return ["//tensorflow/core:protos_all_cc_impl"] diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3e846cd18a..407ff079c1 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -24,6 +24,7 @@ load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") +load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library_py") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_lib_deps") @@ -503,6 +504,7 @@ py_library( ":common_shapes", ":cpp_shape_inference_proto_py", ":errors", + ":framework_fast_tensor_util", ":framework_for_generated_wrappers", ":function", ":graph_util", @@ -733,8 +735,6 @@ py_library( ], ) -# load("//third_party/py/cython:build_defs.bzl", "pyx_library") - py_library( name = "extra_py_tests_deps", srcs_version = "PY2AND3", @@ -4358,3 +4358,10 @@ py_test( "//third_party/py/numpy", ], ) + +pyx_library( + name = "framework_fast_tensor_util", + srcs = ["framework/fast_tensor_util.pyx"], + py_deps = ["//tensorflow/python:util"], + deps = ["//third_party/py/numpy:headers"], +) diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx new file mode 100644 index 0000000000..b43ddb4ad3 --- /dev/null +++ b/tensorflow/python/framework/fast_tensor_util.pyx @@ -0,0 +1,103 @@ +#cython: boundscheck=False +#cython: wraparound=False +#cython: infer_types=True +import numpy as np +cimport numpy as np + +from tensorflow.python.util import compat + + +def AppendFloat32ArrayToTensorProto( + tensor_proto, np.ndarray[np.float32_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.float_val.append(nparray[i]) + + +def AppendFloat64ArrayToTensorProto( + tensor_proto, np.ndarray[np.float64_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.double_val.append(nparray[i]) + + +def AppendInt32ArrayToTensorProto( + tensor_proto, np.ndarray[np.int32_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int_val.append(nparray[i]) + + +def AppendInt64ArrayToTensorProto( + tensor_proto, np.ndarray[np.int64_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int64_val.append(nparray[i]) + + +def AppendUInt8ArrayToTensorProto( + tensor_proto, np.ndarray[np.uint8_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int_val.append(nparray[i]) + + +def AppendUInt16ArrayToTensorProto( + tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int_val.append(nparray[i]) + + +def AppendInt16ArrayToTensorProto( + tensor_proto, np.ndarray[np.int16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int_val.append(nparray[i]) + + +def AppendInt8ArrayToTensorProto( + tensor_proto, np.ndarray[np.int8_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.int_val.append(nparray[i]) + + +def AppendComplex64ArrayToTensorProto( + tensor_proto, np.ndarray[np.complex64_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.scomplex_val.append(nparray[i].real) + tensor_proto.scomplex_val.append(nparray[i].imag) + + +def AppendComplex128ArrayToTensorProto( + tensor_proto, np.ndarray[np.complex128_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.dcomplex_val.append(nparray[i].real) + tensor_proto.dcomplex_val.append(nparray[i].imag) + + +def AppendObjectArrayToTensorProto(tensor_proto, np.ndarray nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.string_val.append(compat.as_bytes(nparray[i])) + + +def AppendBoolArrayToTensorProto(tensor_proto, nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.bool_val.append(np.asscalar(nparray[i])) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 335db92a73..414c61e930 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -27,8 +27,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.util import compat -# TODO(opensource): Add support for pyx_library in the open-source build. -# For now, we use the slow versions that fast_tensor_util replaces. +# Fallback in case fast_tensor_util is not properly compiled. # pylint: disable=g-import-not-at-top try: from tensorflow.python.framework import fast_tensor_util diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f33a942dc9..b226184261 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -713,6 +713,17 @@ def tf_workspace(path_prefix="", tf_repo_name=""): actual = "@cub_archive//:cub", ) + native.new_http_archive( + name = "cython", + sha256 = "6dcd30b5ceb887b2b965ee7ceb82ea3acb5f0642fe2206c7636b45acea4798e5", + urls = [ + "http://mirror.bazel.build/github.com/cython/cython/archive/3732784c45cfb040a5b0936951d196f83a12ea17.tar.gz", + "https://github.com/cython/cython/archive/3732784c45cfb040a5b0936951d196f83a12ea17.tar.gz", + ], + strip_prefix = "cython-3732784c45cfb040a5b0936951d196f83a12ea17", + build_file = str(Label("//third_party:cython.BUILD")), + ) + native.http_archive( name = "bazel_toolchains", urls = [ diff --git a/third_party/cython.BUILD b/third_party/cython.BUILD new file mode 100644 index 0000000000..a8e72a1e36 --- /dev/null +++ b/third_party/cython.BUILD @@ -0,0 +1,28 @@ +# Modified version of @cython//:BUILD.bazel + +py_library( + name = "cython_lib", + srcs = glob( + ["Cython/**/*.py"], + exclude = [ + "**/Tests/*.py", + ], + ) + ["cython.py"], + data = glob([ + "Cython/**/*.pyx", + "Cython/Utility/*.*", + "Cython/Includes/**/*.pxd", + ]), + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], +) + +# May not be named "cython", since that conflicts with Cython/ on OSX +py_binary( + name = "cython_binary", + srcs = ["cython.py"], + main = "cython.py", + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = ["cython_lib"], +) -- GitLab From 376147cd71d1a240dad428c3ff82ca4ea5f4e88e Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Thu, 5 Oct 2017 10:03:12 -0700 Subject: [PATCH 064/909] Save an unnecessary logical_not in the maximum/minimum gradient. PiperOrigin-RevId: 171167415 --- tensorflow/cc/gradients/math_grad.cc | 2 +- tensorflow/python/ops/math_grad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index ac288b1d83..2417bf18a9 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -484,7 +484,7 @@ Status MaximumMinimumGradCommon(const Scope& scope, const Operation& op, auto grad = grad_inputs[0]; auto zeros = ZerosLike(scope, grad); auto gx_1 = Where3(scope, comparator, grad, zeros); - auto gx_2 = Where3(scope, LogicalNot(scope, comparator), grad, zeros); + auto gx_2 = Where3(scope, comparator, zeros, grad); return BinaryGradCommon(scope, op, grad_outputs, gx_1, gx_2); } diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index ee9cbda0c0..d36d66f899 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -851,7 +851,7 @@ def _MaximumMinimumGrad(op, grad, selector_op): xmask = selector_op(x, y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) xgrad = array_ops.where(xmask, grad, zeros) - ygrad = array_ops.where(math_ops.logical_not(xmask), grad, zeros) + ygrad = array_ops.where(xmask, zeros, grad) gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) return (gx, gy) -- GitLab From 23227f038d909d4f415683d4cf2a62a68d774b2c Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Thu, 5 Oct 2017 10:16:17 -0700 Subject: [PATCH 065/909] Add tf.contrib.distributions.MixtureSameFamily.log_cdf. PiperOrigin-RevId: 171169340 --- tensorflow/contrib/distributions/BUILD | 2 + .../kernel_tests/mixture_same_family_test.py | 88 ++++++++++++------- .../python/ops/mixture_same_family.py | 8 ++ 3 files changed, 65 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index aef73f0598..dcdfbbeba2 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -305,6 +305,8 @@ cuda_py_test( additional_deps = [ ":distributions_py", "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py index 47ac412500..ee4f989dac 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_same_family_test.py @@ -23,67 +23,75 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import mixture_same_family as mixture_same_family_lib from tensorflow.contrib.distributions.python.ops import mvn_diag as mvn_diag_lib from tensorflow.contrib.distributions.python.ops import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import bernoulli as bernoulli_lib from tensorflow.python.ops.distributions import categorical as categorical_lib from tensorflow.python.ops.distributions import normal as normal_lib from tensorflow.python.platform import test -class MixtureSameFamilyTest( - test_util.VectorDistributionTestHelpers, test.TestCase): +class MixtureSameFamilyTest(test_util.VectorDistributionTestHelpers, + test.TestCase): def testSampleAndLogProbUnivariateShapes(self): with self.test_session(): gm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=[0.3, 0.7]), + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=normal_lib.Normal( - loc=[-1., 1], - scale=[0.1, 0.5])) - x = gm.sample([4, 5]) + loc=[-1., 1], scale=[0.1, 0.5])) + x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5], x.shape) self.assertEqual([4, 5], log_prob_x.shape) def testSampleAndLogProbShapesBroadcastMix(self): mix_probs = np.float32([.3, .7]) - bern_probs = np.float32([[.4, .6], - [.25, .75]]) + bern_probs = np.float32([[.4, .6], [.25, .75]]) with self.test_session(): bm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=mix_probs), - components_distribution=bernoulli_lib.Bernoulli( - probs=bern_probs)) - x = bm.sample([4, 5]) + mixture_distribution=categorical_lib.Categorical(probs=mix_probs), + components_distribution=bernoulli_lib.Bernoulli(probs=bern_probs)) + x = bm.sample([4, 5], seed=42) log_prob_x = bm.log_prob(x) x_ = x.eval() self.assertEqual([4, 5, 2], x.shape) self.assertEqual([4, 5, 2], log_prob_x.shape) - self.assertAllEqual(np.ones_like(x_, dtype=np.bool), - np.logical_or(x_ == 0., x_ == 1.)) + self.assertAllEqual( + np.ones_like(x_, dtype=np.bool), np.logical_or(x_ == 0., x_ == 1.)) def testSampleAndLogProbMultivariateShapes(self): with self.test_session(): gm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=[0.3, 0.7]), + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( - loc=[[-1., 1], [1, -1]], - scale_identity_multiplier=[1., 0.5])) - x = gm.sample([4, 5]) + loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) + x = gm.sample([4, 5], seed=42) log_prob_x = gm.log_prob(x) self.assertEqual([4, 5, 2], x.shape) self.assertEqual([4, 5], log_prob_x.shape) + def testSampleAndLogProbBatchMultivariateShapes(self): + with self.test_session(): + gm = mixture_same_family_lib.MixtureSameFamily( + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), + components_distribution=mvn_diag_lib.MultivariateNormalDiag( + loc=[[[-1., 1], + [1, -1]], + [[0., 1], + [1, 0]]], + scale_identity_multiplier=[1., 0.5])) + x = gm.sample([4, 5], seed=42) + log_prob_x = gm.log_prob(x) + self.assertEqual([4, 5, 2, 2], x.shape) + self.assertEqual([4, 5, 2], log_prob_x.shape) + def testSampleConsistentLogProb(self): with self.test_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=[0.3, 0.7]), + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( - loc=[[-1., 1], [1, -1]], - scale_identity_multiplier=[1., 0.5])) + loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) # Ball centered at component0's mean. self.run_test_sample_consistent_log_prob( sess, gm, radius=1., center=[-1., 1], rtol=0.02) @@ -91,26 +99,40 @@ class MixtureSameFamilyTest( self.run_test_sample_consistent_log_prob( sess, gm, radius=1., center=[1., -1], rtol=0.02) + def testLogCdf(self): + with self.test_session() as sess: + gm = mixture_same_family_lib.MixtureSameFamily( + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), + components_distribution=normal_lib.Normal( + loc=[-1., 1], scale=[0.1, 0.5])) + x = gm.sample(10, seed=42) + actual_log_cdf = gm.log_cdf(x) + expected_log_cdf = math_ops.reduce_logsumexp( + (gm.mixture_distribution.logits + + gm.components_distribution.log_cdf(x[..., array_ops.newaxis])), + axis=1) + actual_log_cdf_, expected_log_cdf_ = sess.run([ + actual_log_cdf, expected_log_cdf]) + self.assertAllClose(actual_log_cdf_, expected_log_cdf_, + rtol=1e-6, atol=0.0) + def testSampleConsistentMeanCovariance(self): with self.test_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=[0.3, 0.7]), + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( - loc=[[-1., 1], [1, -1]], - scale_identity_multiplier=[1., 0.5])) + loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) self.run_test_sample_consistent_mean_covariance(sess, gm) def testVarianceConsistentCovariance(self): with self.test_session() as sess: gm = mixture_same_family_lib.MixtureSameFamily( - mixture_distribution=categorical_lib.Categorical( - probs=[0.3, 0.7]), + mixture_distribution=categorical_lib.Categorical(probs=[0.3, 0.7]), components_distribution=mvn_diag_lib.MultivariateNormalDiag( - loc=[[-1., 1], [1, -1]], - scale_identity_multiplier=[1., 0.5])) + loc=[[-1., 1], [1, -1]], scale_identity_multiplier=[1., 0.5])) cov_, var_ = sess.run([gm.covariance(), gm.variance()]) self.assertAllClose(cov_.diagonal(), var_, atol=0.) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py index e92bcf8c1f..5558ef0f25 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture_same_family.py +++ b/tensorflow/contrib/distributions/python/ops/mixture_same_family.py @@ -260,6 +260,14 @@ class MixtureSameFamily(distribution.Distribution): probs * self.components_distribution.mean(), axis=-1 - self._event_ndims) # [B, E] + def _log_cdf(self, x): + x = self._pad_sample_dims(x) + log_cdf_x = self.components_distribution.log_cdf(x) # [S, B, k] + log_mix_prob = nn_ops.log_softmax( + self.mixture_distribution.logits, dim=-1) # [B, k] + return math_ops.reduce_logsumexp( + log_cdf_x + log_mix_prob, axis=-1) # [S, B] + def _variance(self): with ops.control_dependencies(self._runtime_assertions): # Law of total variance: Var(Y) = E[Var(Y|X)] + Var(E[Y|X]) -- GitLab From 3b679ec63be33ccfaa99dce3d2c65bad9c36961f Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 5 Oct 2017 10:46:21 -0700 Subject: [PATCH 066/909] Add srcs_version="PY2AND3" PiperOrigin-RevId: 171173975 --- tensorflow/python/eager/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 4069ef1c70..76d4f37e9a 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -431,5 +431,6 @@ filegroup( py_library( name = "imperative_grad", srcs = ["imperative_grad.py"], + srcs_version = "PY2AND3", deps = [":tape"], ) -- GitLab From fd5326666ac5297e2bec09b29728d8731951be23 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 10:52:58 -0700 Subject: [PATCH 067/909] Fixes markdown formatting of EstimatorSpec constructor. Before, it was rendering as italics because of the missing newline. PiperOrigin-RevId: 171175131 --- tensorflow/python/estimator/model_fn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py index d58e03f6ef..da202408c3 100644 --- a/tensorflow/python/estimator/model_fn.py +++ b/tensorflow/python/estimator/model_fn.py @@ -77,6 +77,7 @@ class EstimatorSpec( """Creates a validated `EstimatorSpec` instance. Depending on the value of `mode`, different arguments are required. Namely + * For `mode == ModeKeys.TRAIN`: required fields are `loss` and `train_op`. * For `mode == ModeKeys.EVAL`: required field is `loss`. * For `mode == ModeKeys.PREDICT`: required fields are `predictions`. -- GitLab From 8818469ff81e8877eb7f042df19241b5eaa31637 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 5 Oct 2017 11:35:23 -0700 Subject: [PATCH 068/909] [tf.data] Update more `tf.contrib.data` references to `tf.data`. PiperOrigin-RevId: 171182644 --- .../contrib/data/python/ops/batching.py | 10 ++++---- .../contrib/data/python/ops/enumerate_ops.py | 2 +- .../contrib/data/python/ops/error_ops.py | 4 ++-- .../contrib/data/python/ops/grouping.py | 2 +- .../contrib/data/python/ops/resampling.py | 2 +- .../contrib/data/python/ops/sloppy_ops.py | 2 +- .../api_guides/python/threading_and_queues.md | 23 ++++++++++--------- tensorflow/docs_src/programmers_guide/faq.md | 6 ++--- 8 files changed, 26 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index 16f01557a2..ccfa8747da 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -68,7 +68,7 @@ def dense_to_sparse_batch(batch_size, row_shape): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): @@ -87,7 +87,7 @@ def unbatch(): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): @@ -106,7 +106,7 @@ def unbatch(): def batch_and_drop_remainder(batch_size): """A batching transformation that omits the final small batch (if present). - Like @{tf.contrib.data.Dataset.batch}, this transformation combines + Like @{tf.data.Dataset.batch}, this transformation combines consecutive elements of this dataset into batches. However, if the batch size does not evenly divide the input dataset size, this transformation will drop the final smaller element. @@ -115,7 +115,7 @@ def batch_and_drop_remainder(batch_size): transformation and `Dataset.batch()`: ```python - dataset = tf.contrib.data.Dataset.range(200) + dataset = tf.data.Dataset.range(200) batched = dataset.apply(tf.contrib.data.batch_and_drop_remainder(128)) print(batched.output_shapes) # ==> "(128,)" (the batch dimension is known) ``` @@ -130,7 +130,7 @@ def batch_and_drop_remainder(batch_size): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply} + @{tf.data.Dataset.apply} """ def _apply_fn(dataset): diff --git a/tensorflow/contrib/data/python/ops/enumerate_ops.py b/tensorflow/contrib/data/python/ops/enumerate_ops.py index 40e7315f1f..ac2b386b81 100644 --- a/tensorflow/contrib/data/python/ops/enumerate_ops.py +++ b/tensorflow/contrib/data/python/ops/enumerate_ops.py @@ -47,7 +47,7 @@ def enumerate_dataset(start=0): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index dffa8b7f7d..238bb52b02 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -30,7 +30,7 @@ def ignore_errors(): example: ```python - dataset = tf.contrib.data.Dataset.from_tensor_slices([1., 2., 0., 4.]) + dataset = tf.data.Dataset.from_tensor_slices([1., 2., 0., 4.]) # Computing `tf.check_numerics(1. / 0.)` will raise an InvalidArgumentError. dataset = dataset.map(lambda x: tf.check_numerics(1. / x, "error")) @@ -42,7 +42,7 @@ def ignore_errors(): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 2cf7e8f4ee..6df7b22fb6 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -57,7 +57,7 @@ def group_by_window(key_func, Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. Raises: ValueError: if neither or both of {`window_size`, `window_size_func`} are diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index f4f2d42854..ee46f3e852 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -48,7 +48,7 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): diff --git a/tensorflow/contrib/data/python/ops/sloppy_ops.py b/tensorflow/contrib/data/python/ops/sloppy_ops.py index 01e234f1d0..058c497320 100644 --- a/tensorflow/contrib/data/python/ops/sloppy_ops.py +++ b/tensorflow/contrib/data/python/ops/sloppy_ops.py @@ -118,7 +118,7 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): Returns: A `Dataset` transformation function, which can be passed to - @{tf.contrib.data.Dataset.apply}. + @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): return SloppyInterleaveDataset( diff --git a/tensorflow/docs_src/api_guides/python/threading_and_queues.md b/tensorflow/docs_src/api_guides/python/threading_and_queues.md index 9d8a05c7dc..ab95ce0af9 100644 --- a/tensorflow/docs_src/api_guides/python/threading_and_queues.md +++ b/tensorflow/docs_src/api_guides/python/threading_and_queues.md @@ -1,13 +1,14 @@ # Threading and Queues Note: In versions of TensorFlow before 1.2, we recommended using multi-threaded, -queue-based input pipelines for performance. Beginning with TensorFlow 1.2, -however, we recommend using the `tf.contrib.data` module instead. (See -[Datasets](datasets) for details.) The `tf.contrib.data` module offers an -easier-to-use interface for constructing efficient input pipelines. Furthermore, -we've stopped developing the old multi-threaded, queue-based input pipelines. -We've retained the documentation in this file to help developers who are still -maintaining older code. +queue-based input pipelines for performance. Beginning with TensorFlow 1.4, +however, we recommend using the `tf.data` module instead. (See +[Datasets](datasets) for details. In TensorFlow 1.2 and 1.3, the module was +called `tf.contrib.data`.) The `tf.data` module offers an easier-to-use +interface for constructing efficient input pipelines. Furthermore, we've stopped +developing the old multi-threaded, queue-based input pipelines. We've retained +the documentation in this file to help developers who are still maintaining +older code. Multithreaded queues are a powerful and widely used mechanism supporting asynchronous computation. @@ -58,9 +59,9 @@ prepare inputs for training a model as follows: * A training thread executes a training op that dequeues mini-batches from the queue -We recommend using the @{tf.contrib.data.Dataset.shuffle$`shuffle`} -and @{tf.contrib.data.Dataset.batch$`batch`} methods of a -@{tf.contrib.data.Dataset$`Dataset`} to accomplish this. However, if you'd prefer +We recommend using the @{tf.data.Dataset.shuffle$`shuffle`} +and @{tf.data.Dataset.batch$`batch`} methods of a +@{tf.data.Dataset$`Dataset`} to accomplish this. However, if you'd prefer to use a queue-based version instead, you can find a full implementation in the @{tf.train.shuffle_batch} function. @@ -103,7 +104,7 @@ The simplest possible use of this function might be something like this: ``` python # create a dataset that counts from 0 to 99 input = tf.constant(list(range(100))) -input = tf.contrib.data.Dataset.from_tensor_slices(input) +input = tf.data.Dataset.from_tensor_slices(input) input = input.make_one_shot_iterator().get_next() # Create a slightly shuffled batch from the sorted elements diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 865016dc02..67ed0a9a60 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -269,13 +269,13 @@ See the how-to documentation for There are three main options for dealing with data in a custom format. The easiest option is to write parsing code in Python that transforms the data -into a numpy array. Then use @{tf.contrib.data.Dataset.from_tensor_slices} to +into a numpy array. Then use @{tf.data.Dataset.from_tensor_slices} to create an input pipeline from the in-memory data. If your data doesn't fit in memory, try doing the parsing in the Dataset pipeline. Start with an appropriate file reader, like -@{tf.contrib.data.TextLineDataset}. Then convert the dataset by mapping -@{tf.contrib.data.Dataset.map$mapping} appropriate operations over it. +@{tf.data.TextLineDataset}. Then convert the dataset by mapping +@{tf.data.Dataset.map$mapping} appropriate operations over it. Prefer predefined TensorFlow operations such as @{tf.decode_raw}, @{tf.decode_csv}, @{tf.parse_example}, or @{tf.image.decode_png}. -- GitLab From 6c875f0da3c61610063f705111b9bfa2e26ca52f Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Thu, 5 Oct 2017 11:56:28 -0700 Subject: [PATCH 069/909] Add the 'is_the_final_export' signal to Exporters. Instead of adding the option to respect `is_the_final_export` into the `Exporter` that also does garbage collection, such exporter is split into two: `LatestExporter` and `FinalExporter`. There is a concern that options `exports_to_keep` and `only_the_final_export` overlap significantly and are somewhat in conflict. What does it mean to keep last 5 exports but only export the final one? After splitting in two classes there is a lot of code duplication. The common implementation is gathered in a private base class. When the training ends, the final export is performed via `Exporter.export()` call. That final export is going to have is_the_final_export parameter being set to true. If `TrainSpec.max_steps` is `None`, then "when training ends" is undefined. We are going to train forever. In that case, `is_the_final_export` is going to be always False. I added a note about it. PiperOrigin-RevId: 171185881 --- tensorflow/python/estimator/estimator_lib.py | 2 + tensorflow/python/estimator/exporter.py | 134 ++++++++++++++++-- tensorflow/python/estimator/exporter_test.py | 42 +++++- tensorflow/python/estimator/training.py | 37 +++-- tensorflow/python/estimator/training_test.py | 81 +++++++++++ .../tensorflow.estimator.-exporter.pbtxt | 2 +- ...tensorflow.estimator.-final-exporter.pbtxt | 18 +++ ...ensorflow.estimator.-latest-exporter.pbtxt | 2 +- .../api/golden/tensorflow.estimator.pbtxt | 4 + 9 files changed, 293 insertions(+), 29 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.-final-exporter.pbtxt diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index a5b3faeffb..5b82fd75ff 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -30,6 +30,7 @@ from tensorflow.python.estimator.canned.parsing_utils import regressor_parse_exa from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.export import export_lib as export from tensorflow.python.estimator.exporter import Exporter +from tensorflow.python.estimator.exporter import FinalExporter from tensorflow.python.estimator.exporter import LatestExporter from tensorflow.python.estimator.inputs import inputs from tensorflow.python.estimator.model_fn import EstimatorSpec @@ -70,6 +71,7 @@ _allowed_symbols = [ 'TrainSpec', 'Exporter', 'LatestExporter', + 'FinalExporter', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index 505820dd93..56400ab935 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -40,7 +40,8 @@ class Exporter(object): pass @abc.abstractmethod - def export(self, estimator, export_path, checkpoint_path, eval_result): + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): """Exports the given `Estimator` to a specific format. Args: @@ -48,6 +49,12 @@ class Exporter(object): export_path: A string containing a directory where to write the export. checkpoint_path: The checkpoint path to export. eval_result: The output of `Estimator.evaluate` on this checkpoint. + is_the_final_export: This boolean is True when this is an export in the + end of training. It is False for the intermediate exports during + the training. + When passing `Exporter` to `tf.estimator.train_and_evaluate` + `is_the_final_export` is always False if `TrainSpec.max_steps` is + `None`. Returns: The string path to the exported directory or `None` if export is skipped. @@ -55,18 +62,18 @@ class Exporter(object): pass -class LatestExporter(Exporter): +class _SavedModelExporter(Exporter): """This class exports the serving graph and checkpoints. - In addition, the class also garbage collects stale exports. + This class provides a basic exporting functionality and serves as a + foundation for specialized `Exporter`s. """ def __init__(self, name, serving_input_fn, assets_extra=None, - as_text=False, - exports_to_keep=5): + as_text=False): """Create an `Exporter` to use with `tf.estimator.EvalSpec`. Args: @@ -83,9 +90,6 @@ class LatestExporter(Exporter): `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. Defaults to `False`. - exports_to_keep: Number of exports to keep. Older exports will be - garbage-collected. Defaults to 5. Set to `None` to disable garbage - collection. Raises: ValueError: if any arguments is invalid. @@ -94,16 +98,15 @@ class LatestExporter(Exporter): self._serving_input_fn = serving_input_fn self._assets_extra = assets_extra self._as_text = as_text - self._exports_to_keep = exports_to_keep - if exports_to_keep is not None and exports_to_keep <= 0: - raise ValueError( - '`exports_to_keep`, if provided, must be positive number') @property def name(self): return self._name - def export(self, estimator, export_path, checkpoint_path, eval_result): + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del is_the_final_export + export_result = estimator.export_savedmodel( export_path, self._serving_input_fn, @@ -111,6 +114,111 @@ class LatestExporter(Exporter): as_text=self._as_text, checkpoint_path=checkpoint_path) + return export_result + + +class FinalExporter(Exporter): + """This class exports the serving graph and checkpoints in the end. + + This class performs a single export in the end of training. + """ + + def __init__(self, + name, + serving_input_fn, + assets_extra=None, + as_text=False): + """Create an `Exporter` to use with `tf.estimator.EvalSpec`. + + Args: + name: unique name of this `Exporter` that is going to be used in the + export path. + serving_input_fn: a function that takes no arguments and returns an + `ServingInputReceiver`. + assets_extra: An optional dict specifying how to populate the assets.extra + directory within the exported SavedModel. Each key should give the + destination path (including the filename) relative to the assets.extra + directory. The corresponding value gives the full path of the source + file to be copied. For example, the simple case of copying a single + file without renaming it is specified as + `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. + as_text: whether to write the SavedModel proto in text format. Defaults to + `False`. + + Raises: + ValueError: if any arguments is invalid. + """ + self._saved_model_exporter = _SavedModelExporter(name, serving_input_fn, + assets_extra, as_text) + + @property + def name(self): + return self._saved_model_exporter.name + + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + if not is_the_final_export: + return None + + tf_logging.info('Performing the final export in the end of training.') + + return self._saved_model_exporter.export(estimator, export_path, + checkpoint_path, eval_result, + is_the_final_export) + + +class LatestExporter(Exporter): + """This class regularly exports the serving graph and checkpoints. + + In addition to exporting, this class also garbage collects stale exports. + """ + + def __init__(self, + name, + serving_input_fn, + assets_extra=None, + as_text=False, + exports_to_keep=5): + """Create an `Exporter` to use with `tf.estimator.EvalSpec`. + + Args: + name: unique name of this `Exporter` that is going to be used in the + export path. + serving_input_fn: a function that takes no arguments and returns an + `ServingInputReceiver`. + assets_extra: An optional dict specifying how to populate the assets.extra + directory within the exported SavedModel. Each key should give the + destination path (including the filename) relative to the assets.extra + directory. The corresponding value gives the full path of the source + file to be copied. For example, the simple case of copying a single + file without renaming it is specified as + `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. + as_text: whether to write the SavedModel proto in text format. Defaults to + `False`. + exports_to_keep: Number of exports to keep. Older exports will be + garbage-collected. Defaults to 5. Set to `None` to disable garbage + collection. + + Raises: + ValueError: if any arguments is invalid. + """ + self._saved_model_exporter = _SavedModelExporter(name, serving_input_fn, + assets_extra, as_text) + self._exports_to_keep = exports_to_keep + if exports_to_keep is not None and exports_to_keep <= 0: + raise ValueError( + '`exports_to_keep`, if provided, must be positive number') + + @property + def name(self): + return self._saved_model_exporter.name + + def export(self, estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + export_result = self._saved_model_exporter.export( + estimator, export_path, checkpoint_path, eval_result, + is_the_final_export) + self._garbage_collect_exports(export_path) return export_result diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 2ceff1bfd6..f90c35dce7 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -37,12 +37,13 @@ class LatestExporterTest(test.TestCase): pass with self.assertRaisesRegexp(ValueError, "positive number"): - exporter_lib.LatestExporter( + exporter = exporter_lib.LatestExporter( name="latest_exporter", serving_input_fn=_serving_input_fn, exports_to_keep=0) + self.assertEqual("latest_exporter", exporter.name) - def test_saved_model_exporter(self): + def test_latest_exporter(self): def _serving_input_fn(): pass @@ -60,7 +61,40 @@ class LatestExporterTest(test.TestCase): estimator.export_savedmodel.return_value = "export_result_path" export_result = exporter.export(estimator, export_dir_base, - "checkpoint_path", {}) + "checkpoint_path", {}, False) + + self.assertEqual("export_result_path", export_result) + estimator.export_savedmodel.assert_called_with( + export_dir_base, + _serving_input_fn, + assets_extra={"from/path": "to/path"}, + as_text=False, + checkpoint_path="checkpoint_path") + + def test_only_the_last_export_is_saved(self): + + def _serving_input_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + "export/" + gfile.MkDir(export_dir_base) + + exporter = exporter_lib.FinalExporter( + name="latest_exporter", + serving_input_fn=_serving_input_fn, + assets_extra={"from/path": "to/path"}, + as_text=False) + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.export_savedmodel.return_value = "export_result_path" + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {}, False) + + self.assertFalse(estimator.export_savedmodel.called) + self.assertEqual(None, export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {}, True) self.assertEqual("export_result_path", export_result) estimator.export_savedmodel.assert_called_with( @@ -93,7 +127,7 @@ class LatestExporterTest(test.TestCase): estimator = test.mock.Mock(spec=estimator_lib.Estimator) # Garbage collect all but the most recent 2 exports, # where recency is determined based on the timestamp directory names. - exporter.export(estimator, export_dir_base, None, None) + exporter.export(estimator, export_dir_base, None, None, False) self.assertFalse(gfile.Exists(export_dir_1)) self.assertFalse(gfile.Exists(export_dir_2)) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 17c072566a..5c0ebbea35 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -519,8 +519,11 @@ class _TrainingExecutor(object): class NewCheckpointListener( basic_session_run_hooks.CheckpointSaverListener): - def __init__(self, estimator, eval_spec): - self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec) # pylint: disable=protected-access + def __init__(self, estimator, eval_spec, max_training_steps): + # pylint: disable=protected-access + self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec, + max_training_steps) + # pylint: enable=protected-access def after_save(self, session, global_step_value): del session, global_step_value @@ -528,8 +531,10 @@ class _TrainingExecutor(object): # When the underlying `Estimator` object saves a new checkpoint, we would # like this callback to be called so that evaluation and export can trigger. - saving_listeners = [NewCheckpointListener(self._estimator, self._eval_spec)] - + saving_listeners = [ + NewCheckpointListener(self._estimator, self._eval_spec, + self._train_spec.max_steps) + ] return self._start_distributed_training(saving_listeners=saving_listeners) def run_evaluator(self): @@ -566,7 +571,8 @@ class _TrainingExecutor(object): 'after {} secs (eval_spec.throttle_secs) or training is ' 'finished.'.format(self._eval_spec.throttle_secs)) - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, + self._train_spec.max_steps) while True: self._estimator.train( @@ -636,7 +642,8 @@ class _TrainingExecutor(object): time.sleep(start_delay_secs) latest_eval_result = None - evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec) + evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec, + self._train_spec.max_steps) while True: if latest_eval_result: @@ -663,11 +670,12 @@ class _TrainingExecutor(object): class _Evaluator(object): """A helper class to call `Estimator.evaluate` and export model.""" - def __init__(self, estimator, eval_spec): + def __init__(self, estimator, eval_spec, max_training_steps): self._estimator = estimator self._eval_spec = eval_spec self._previous_ckpt_path = None self._last_warning_time = 0 + self._max_training_steps = max_training_steps def evaluate_and_export(self): """Evaluate and (maybe) export the current model. @@ -712,7 +720,14 @@ class _TrainingExecutor(object): 'Internal error: `Estimator.evaluate` result should have ' '`global_step` in result. Given {}'.format(eval_result)) - self._export_eval_result(eval_result, latest_ckpt_path) + # TODO(isaprykin): There is a potential race condition here in the + # distributed setting. The worker job that performs training + # might stop at a later global step value than the evalutor job. + is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >= + self._max_training_steps + if self._max_training_steps else False) + self._export_eval_result(eval_result, latest_ckpt_path, + is_the_final_export) self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path @@ -725,7 +740,8 @@ class _TrainingExecutor(object): logging.warning(message) self._last_warning_time = current_time - def _export_eval_result(self, eval_result, checkpoint_path): + def _export_eval_result(self, eval_result, checkpoint_path, + is_the_final_export): """Export `eval_result` according to exporters in `EvalSpec`.""" export_dir_base = os.path.join( compat.as_str_any(self._estimator.model_dir), @@ -738,4 +754,5 @@ class _TrainingExecutor(object): compat.as_str_any(export_dir_base), compat.as_str_any(exporter.name)), checkpoint_path=checkpoint_path, - eval_result=eval_result) + eval_result=eval_result, + is_the_final_export=is_the_final_export) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 51aed757a2..40972ab5a0 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -815,6 +815,46 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): self.assertEqual(2, mock_est.evaluate.call_count) self.assertEqual(2, exporter.export.call_count) + def test_final_export_is_true_in_the_end(self): + training_max_step = 200 + + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est.model_dir = compat.as_bytes(test.get_temp_dir()) + mock_est.evaluate.side_effect = [ + {_GLOBAL_STEP_KEY: training_max_step // 2}, + {_GLOBAL_STEP_KEY: training_max_step} + ] + mock_est.latest_checkpoint.side_effect = ['path_1', 'path_2'] + + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_train_spec.max_steps = training_max_step + + mock_est.times_export_fn_was_called = 0 + mock_est.times_the_final_export_was_true = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result + estimator.times_export_fn_was_called += 1 + if is_the_final_export: + estimator.times_the_final_export_was_true += 1 + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_how_many_times_export_is_called' + exporter.export = export + + eval_spec = training.EvalSpec( + input_fn=lambda: 1, + start_delay_secs=0, + throttle_secs=0, + exporters=exporter) + + executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec) + executor.run_evaluator() + + self.assertEqual(2, mock_est.evaluate.call_count) + self.assertEqual(2, mock_est.times_export_fn_was_called) + self.assertEqual(1, mock_est.times_the_final_export_was_true) + def test_skip_evaluation_due_to_ckpt(self): training_max_step = 200 mock_est = test.mock.Mock(spec=estimator_lib.Estimator) @@ -1147,6 +1187,47 @@ class TrainingExecutorRunLocalTest(test.TestCase): with self.assertRaisesRegexp(RuntimeError, _STALE_CHECKPOINT_MSG): executor.run_local() + def test_final_export_is_true_in_the_end(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn + + mock_est.times_export_fn_was_called = 0 + mock_est.times_the_final_export_was_true = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result + estimator.times_export_fn_was_called += 1 + if is_the_final_export: + estimator.times_the_final_export_was_true += 1 + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_how_many_times_export_is_called' + exporter.export = export + + train_spec = training.TrainSpec( + input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()]) + eval_spec = training.EvalSpec( + input_fn=lambda: 1, + hooks=[_FakeHook()], + throttle_secs=100, + exporters=exporter) + # should be called 3 times. + mock_est.evaluate.side_effect = [{ + _GLOBAL_STEP_KEY: train_spec.max_steps - 100 + }, { + _GLOBAL_STEP_KEY: train_spec.max_steps - 50 + }, { + _GLOBAL_STEP_KEY: train_spec.max_steps + }] + + executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) + executor.run_local() + + self.assertEqual(3, mock_est.train.call_count) + self.assertEqual(3, mock_est.evaluate.call_count) + self.assertEqual(3, mock_est.times_export_fn_was_called) + self.assertEqual(1, mock_est.times_the_final_export_was_true) + def test_train_and_evaluate_args(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') mock_est.latest_checkpoint.return_value = 'checkpoint_path/' diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt index c69e4c7a30..035af70e52 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-exporter.pbtxt @@ -11,6 +11,6 @@ tf_class { } member_method { name: "export" - argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\', \'is_the_final_export\'], varargs=None, keywords=None, defaults=None" } } diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-final-exporter.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-final-exporter.pbtxt new file mode 100644 index 0000000000..4c2dbc4d37 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-final-exporter.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.estimator.FinalExporter" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "name" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\', \'serving_input_fn\', \'assets_extra\', \'as_text\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " + } + member_method { + name: "export" + argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\', \'is_the_final_export\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt index c3f98f84b8..ae1483bf3f 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-latest-exporter.pbtxt @@ -13,6 +13,6 @@ tf_class { } member_method { name: "export" - argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'estimator\', \'export_path\', \'checkpoint_path\', \'eval_result\', \'is_the_final_export\'], varargs=None, keywords=None, defaults=None" } } diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt index 25e94a14a6..ef93a61bd8 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.pbtxt @@ -32,6 +32,10 @@ tf_module { name: "Exporter" mtype: "" } + member { + name: "FinalExporter" + mtype: "" + } member { name: "LatestExporter" mtype: "" -- GitLab From 9f00851a636e77223d4445a5ffa1fe1bf506f54e Mon Sep 17 00:00:00 2001 From: Jonathan Shen Date: Thu, 5 Oct 2017 12:09:44 -0700 Subject: [PATCH 070/909] Register GPU bool Fill op. PiperOrigin-RevId: 171187907 --- tensorflow/core/kernels/constant_op.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index 0cc2ea0109..618d4f580b 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -247,6 +247,7 @@ REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, uint16); REGISTER_KERNEL(GPU, int16); REGISTER_KERNEL(GPU, int64); +REGISTER_KERNEL(GPU, bool); // Currently we do not support filling strings and complex64 on GPU // A special GPU kernel for int32. -- GitLab From 4bf27f8d4acee2cb8df27427668bddc92137e2ef Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 5 Oct 2017 12:22:32 -0700 Subject: [PATCH 071/909] eager: Release Python GIL when executing kernels. As a side effect, this enables use of py_func. PiperOrigin-RevId: 171189922 --- tensorflow/contrib/eager/python/datasets_test.py | 12 ++++++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 2 ++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py index a2da6b28c6..076c92e73f 100644 --- a/tensorflow/contrib/eager/python/datasets_test.py +++ b/tensorflow/contrib/eager/python/datasets_test.py @@ -19,7 +19,9 @@ from __future__ import print_function from tensorflow.contrib.data import Dataset from tensorflow.contrib.eager.python import datasets from tensorflow.python.eager import test +from tensorflow.python.framework import dtypes from tensorflow.python.ops import math_ops +from tensorflow.python.ops import script_ops class IteratorTest(test.TestCase): @@ -69,6 +71,16 @@ class IteratorTest(test.TestCase): got2 = [x.numpy() for x in datasets.Iterator(ds)] self.assertAllEqual(got1, got2) + def testPyFunc(self): + + def my_map(inp): + return [[x + 1 for x in inp]] + + ds = Dataset.range(4).map( + lambda x: script_ops.py_func(my_map, [[x]], dtypes.int64)) + got = [x.numpy() for x in datasets.Iterator(ds)] + self.assertAllEqual([[1], [2], [3], [4]], got) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index a2079d009f..3d64c875ec 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -342,6 +342,7 @@ void TFE_Py_Execute(TFE_Context* ctx, const char* device_name, if (TF_GetCode(out_status) == TF_OK) { SetOpAttrs(ctx, op, attrs, out_status); } + Py_BEGIN_ALLOW_THREADS; if (TF_GetCode(out_status) == TF_OK) { int num_outputs = outputs->size(); TFE_Execute(op, outputs->data(), &num_outputs, out_status); @@ -354,6 +355,7 @@ void TFE_Py_Execute(TFE_Context* ctx, const char* device_name, .c_str()); } TFE_DeleteOp(op); + Py_END_ALLOW_THREADS; } PyObject* TFE_Py_RegisterExceptionClass(PyObject* e) { -- GitLab From b31c03565e18fef7ab4539032dd5c69a94487a05 Mon Sep 17 00:00:00 2001 From: Mustafa Ispir Date: Thu, 5 Oct 2017 12:55:19 -0700 Subject: [PATCH 072/909] Move profiler hook from contrib to core. PiperOrigin-RevId: 171194291 --- tensorflow/contrib/hooks/BUILD | 20 --- .../hooks/python/training/profiler_hook.py | 87 +------------ .../python/training/profiler_hook_test.py | 122 ------------------ tensorflow/python/BUILD | 1 + .../training/basic_session_run_hooks.py | 82 +++++++++++- .../training/basic_session_run_hooks_test.py | 93 +++++++++++++ tensorflow/python/training/training.py | 2 + .../tensorflow.train.-profiler-hook.pbtxt | 30 +++++ .../tools/api/golden/tensorflow.train.pbtxt | 4 + 9 files changed, 214 insertions(+), 227 deletions(-) delete mode 100644 tensorflow/contrib/hooks/python/training/profiler_hook_test.py create mode 100644 tensorflow/tools/api/golden/tensorflow.train.-profiler-hook.pbtxt diff --git a/tensorflow/contrib/hooks/BUILD b/tensorflow/contrib/hooks/BUILD index d81e868d4a..1576c9ec9b 100644 --- a/tensorflow/contrib/hooks/BUILD +++ b/tensorflow/contrib/hooks/BUILD @@ -19,26 +19,6 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - "//tensorflow/core:protos_all_py", - "//tensorflow/python:client", - "//tensorflow/python:platform", - "//tensorflow/python:training", - "//tensorflow/python:util", - ], -) - -py_test( - name = "profiler_hook_test", - size = "small", - srcs = ["python/training/profiler_hook_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":hooks", - "//tensorflow/contrib/framework:framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:platform", - "//tensorflow/python:state_ops", "//tensorflow/python:training", ], ) diff --git a/tensorflow/contrib/hooks/python/training/profiler_hook.py b/tensorflow/contrib/hooks/python/training/profiler_hook.py index 35aa25edfd..6173aa0797 100644 --- a/tensorflow/contrib/hooks/python/training/profiler_hook.py +++ b/tensorflow/contrib/hooks/python/training/profiler_hook.py @@ -12,93 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Additional `SessionRunHook` implementations to complement those in -tensorflow/python/training. - -""" +"""Placeholder of ProfilerHook for backward compatibility.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os.path - -from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.client import timeline -from tensorflow.python.platform import gfile -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training.basic_session_run_hooks import SecondOrStepTimer -from tensorflow.python.training.session_run_hook import SessionRunArgs -from tensorflow.python.training import session_run_hook -from tensorflow.python.training import training_util - - -class ProfilerHook(session_run_hook.SessionRunHook): - """Captures CPU/GPU profiling information every N steps or seconds. - - This produces files called "timeline-.json", which are in Chrome - Trace format. - - For more information see: - https://github.com/catapult-project/catapult/blob/master/tracing/README.md""" - - def __init__(self, - save_steps=None, - save_secs=None, - output_dir="", - show_dataflow=True, - show_memory=False): - """Initializes a hook that takes periodic profiling snapshots. - - Args: - save_steps: `int`, save profile traces every N steps. Exactly one of - `save_secs` and `save_steps` should be set. - save_secs: `int`, save profile traces every N seconds. - output_dir: `string`, the directory to save the profile traces to. - Defaults to the current directory. - show_dataflow: `bool`, if True, add flow events to the trace connecting - producers and consumers of tensors. - show_memory: `bool`, if True, add object snapshot events to the trace - showing the sizes and lifetimes of tensors. - """ - self._output_file = os.path.join(output_dir, "timeline-{}.json") - self._show_dataflow = show_dataflow - self._show_memory = show_memory - self._timer = SecondOrStepTimer(every_secs=save_secs, - every_steps=save_steps) - - def begin(self): - self._next_step = None - self._global_step_tensor = training_util.get_global_step() - if self._global_step_tensor is None: - raise RuntimeError( - "Global step should be created to use ProfilerHook.") - - def before_run(self, run_context): - self._request_summary = ( - self._next_step is None or - self._timer.should_trigger_for_step(self._next_step)) - requests = {"global_step": self._global_step_tensor} - opts = (config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE) - if self._request_summary else None) - - return SessionRunArgs(requests, options=opts) - - def after_run(self, run_context, run_values): - global_step = run_values.results["global_step"] - - if self._request_summary: - self._timer.update_last_triggered_step(global_step) - self._save(global_step, - self._output_file.format(global_step), - run_values.run_metadata.step_stats) - - self._next_step = global_step + 1 +from tensorflow.python.training import basic_session_run_hooks - def _save(self, step, save_path, step_stats): - logging.info("Saving timeline for %d into '%s'.", step, save_path) - with gfile.Open(save_path, "w") as f: - trace = timeline.Timeline(step_stats) - f.write(trace.generate_chrome_trace_format( - show_dataflow=self._show_dataflow, - show_memory=self._show_memory)) +ProfilerHook = basic_session_run_hooks.ProfilerHook # pylint: disable=invalid-name diff --git a/tensorflow/contrib/hooks/python/training/profiler_hook_test.py b/tensorflow/contrib/hooks/python/training/profiler_hook_test.py deleted file mode 100644 index e7ecb5eb2f..0000000000 --- a/tensorflow/contrib/hooks/python/training/profiler_hook_test.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for profiler_hook.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path -import shutil -import tempfile - -from tensorflow.contrib.framework.python.ops import variables -from tensorflow.contrib.hooks.python.training import ProfilerHook -from tensorflow.python.framework import ops -from tensorflow.python.ops import state_ops -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.training import monitored_session - - -class ProfilerHookTest(test.TestCase): - - def setUp(self): - super(ProfilerHookTest, self).setUp() - self.output_dir = tempfile.mkdtemp() - self.graph = ops.Graph() - self.filepattern = os.path.join(self.output_dir, "timeline-*.json") - with self.graph.as_default(): - self.global_step = variables.get_or_create_global_step() - self.train_op = state_ops.assign_add(self.global_step, 1) - - def tearDown(self): - super(ProfilerHookTest, self).tearDown() - shutil.rmtree(self.output_dir, ignore_errors=True) - - def _count_timeline_files(self): - return len(gfile.Glob(self.filepattern)) - - def test_raise_in_both_secs_and_steps(self): - with self.assertRaises(ValueError): - ProfilerHook(save_secs=10, save_steps=20) - - def test_raise_in_none_secs_and_steps(self): - with self.assertRaises(ValueError): - ProfilerHook(save_secs=None, save_steps=None) - - def test_save_secs_saves_in_first_step(self): - with self.graph.as_default(): - hook = ProfilerHook(save_secs=2, output_dir=self.output_dir) - with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: - sess.run(self.train_op) - self.assertEqual(1, self._count_timeline_files()) - - @test.mock.patch('time.time') - def test_save_secs_saves_periodically(self, mock_time): - # Pick a fixed start time. - current_time = 1484863632.320497 - - with self.graph.as_default(): - mock_time.return_value = current_time - hook = ProfilerHook(save_secs=2, output_dir=self.output_dir) - with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: - sess.run(self.train_op) # Saved. - self.assertEqual(1, self._count_timeline_files()) - sess.run(self.train_op) # Not saved. - self.assertEqual(1, self._count_timeline_files()) - # Simulate 2.5 seconds of sleep. - mock_time.return_value = current_time + 2.5 - sess.run(self.train_op) # Saved. - - # Pretend some small amount of time has passed. - mock_time.return_value = current_time + 0.1 - sess.run(self.train_op) # Not saved. - # Edge test just before we should save the timeline. - mock_time.return_value = current_time + 1.9 - sess.run(self.train_op) # Not saved. - self.assertEqual(2, self._count_timeline_files()) - - mock_time.return_value = current_time + 4.5 - sess.run(self.train_op) # Saved. - self.assertEqual(3, self._count_timeline_files()) - - def test_save_steps_saves_in_first_step(self): - with self.graph.as_default(): - hook = ProfilerHook(save_secs=2, output_dir=self.output_dir) - with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: - sess.run(self.train_op) # Saved. - sess.run(self.train_op) # Not saved. - self.assertEqual(1, self._count_timeline_files()) - - def test_save_steps_saves_periodically(self): - with self.graph.as_default(): - hook = ProfilerHook(save_steps=2, output_dir=self.output_dir) - with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: - self.assertEqual(0, self._count_timeline_files()) - sess.run(self.train_op) # Saved. - self.assertEqual(1, self._count_timeline_files()) - sess.run(self.train_op) # Not saved. - self.assertEqual(1, self._count_timeline_files()) - sess.run(self.train_op) # Saved. - self.assertEqual(2, self._count_timeline_files()) - sess.run(self.train_op) # Not saved. - self.assertEqual(2, self._count_timeline_files()) - sess.run(self.train_op) # Saved. - self.assertEqual(3, self._count_timeline_files()) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 407ff079c1..ab3b851ef8 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3626,6 +3626,7 @@ py_test( ":variables", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/testing:testing_py", + "//tensorflow/core:protos_all_py", ], ) diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index 99f057e837..1fb00343ef 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -22,7 +22,7 @@ @@NanTensorHook @@SummarySaverHook @@GlobalStepWaiterHook - +@@ProfilerHook """ from __future__ import absolute_import @@ -36,9 +36,12 @@ import numpy as np import six from tensorflow.core.framework.summary_pb2 import Summary +from tensorflow.core.protobuf import config_pb2 from tensorflow.core.util.event_pb2 import SessionLog +from tensorflow.python.client import timeline from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util @@ -778,6 +781,83 @@ class FeedFnHook(session_run_hook.SessionRunHook): fetches=None, feed_dict=self.feed_fn()) +class ProfilerHook(session_run_hook.SessionRunHook): + """Captures CPU/GPU profiling information every N steps or seconds. + + This produces files called "timeline-.json", which are in Chrome + Trace format. + + For more information see: + https://github.com/catapult-project/catapult/blob/master/tracing/README.md + """ + + def __init__(self, + save_steps=None, + save_secs=None, + output_dir="", + show_dataflow=True, + show_memory=False): + """Initializes a hook that takes periodic profiling snapshots. + + `options.run_metadata` argument of `tf.Session.Run` is used to collect + metadata about execution. This hook sets the metadata and dumps it in Chrome + Trace format. + + + Args: + save_steps: `int`, save profile traces every N steps. Exactly one of + `save_secs` and `save_steps` should be set. + save_secs: `int` or `float`, save profile traces every N seconds. + output_dir: `string`, the directory to save the profile traces to. + Defaults to the current directory. + show_dataflow: `bool`, if True, add flow events to the trace connecting + producers and consumers of tensors. + show_memory: `bool`, if True, add object snapshot events to the trace + showing the sizes and lifetimes of tensors. + """ + self._output_file = os.path.join(output_dir, "timeline-{}.json") + self._show_dataflow = show_dataflow + self._show_memory = show_memory + self._timer = SecondOrStepTimer( + every_secs=save_secs, every_steps=save_steps) + + def begin(self): + self._next_step = None + self._global_step_tensor = training_util._get_or_create_global_step_read() # pylint: disable=protected-access + if self._global_step_tensor is None: + raise RuntimeError("Global step should be created to use ProfilerHook.") + + def before_run(self, run_context): + self._request_summary = ( + self._next_step is None or + self._timer.should_trigger_for_step(self._next_step)) + requests = {"global_step": self._global_step_tensor} + opts = (config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE) + if self._request_summary else None) + + return SessionRunArgs(requests, options=opts) + + def after_run(self, run_context, run_values): + stale_global_step = run_values.results["global_step"] + global_step = stale_global_step + 1 + if self._request_summary: + global_step = run_context.session.run(self._global_step_tensor) + self._timer.update_last_triggered_step(global_step) + self._save(global_step, + self._output_file.format(global_step), + run_values.run_metadata.step_stats) + + self._next_step = global_step + 1 + + def _save(self, step, save_path, step_stats): + logging.info("Saving timeline for %d into '%s'.", step, save_path) + with gfile.Open(save_path, "w") as f: + trace = timeline.Timeline(step_stats) + f.write( + trace.generate_chrome_trace_format( + show_dataflow=self._show_dataflow, show_memory=self._show_memory)) + + def _as_graph_element(obj): """Retrieves Graph element.""" graph = ops.get_default_graph() diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index 96c13edd4c..e7ff7e1221 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os.path import shutil import tempfile import threading @@ -38,6 +39,7 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib import tensorflow.python.ops.nn_grad # pylint: disable=unused-import +from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.summary import summary as summary_lib @@ -1161,5 +1163,96 @@ class FeedFnHookTest(test.TestCase): self.assertEqual(mon_sess.run(y), 2) +class ProfilerHookTest(test.TestCase): + + def setUp(self): + super(ProfilerHookTest, self).setUp() + self.output_dir = tempfile.mkdtemp() + self.graph = ops.Graph() + self.filepattern = os.path.join(self.output_dir, 'timeline-*.json') + with self.graph.as_default(): + self.global_step = variables.get_or_create_global_step() + self.train_op = state_ops.assign_add(self.global_step, 1) + + def tearDown(self): + super(ProfilerHookTest, self).tearDown() + shutil.rmtree(self.output_dir, ignore_errors=True) + + def _count_timeline_files(self): + return len(gfile.Glob(self.filepattern)) + + def test_raise_in_both_secs_and_steps(self): + with self.assertRaises(ValueError): + basic_session_run_hooks.ProfilerHook(save_secs=10, save_steps=20) + + def test_raise_in_none_secs_and_steps(self): + with self.assertRaises(ValueError): + basic_session_run_hooks.ProfilerHook(save_secs=None, save_steps=None) + + def test_save_secs_saves_in_first_step(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.ProfilerHook( + save_secs=2, output_dir=self.output_dir) + with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: + sess.run(self.train_op) + self.assertEqual(1, self._count_timeline_files()) + + @test.mock.patch.object(time, 'time') + def test_save_secs_saves_periodically(self, mock_time): + # Pick a fixed start time. + current_time = 1484863632.320497 + + with self.graph.as_default(): + mock_time.return_value = current_time + hook = basic_session_run_hooks.ProfilerHook( + save_secs=2, output_dir=self.output_dir) + with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: + sess.run(self.train_op) # Saved. + self.assertEqual(1, self._count_timeline_files()) + sess.run(self.train_op) # Not saved. + self.assertEqual(1, self._count_timeline_files()) + # Simulate 2.5 seconds of sleep. + mock_time.return_value = current_time + 2.5 + sess.run(self.train_op) # Saved. + + # Pretend some small amount of time has passed. + mock_time.return_value = current_time + 0.1 + sess.run(self.train_op) # Not saved. + # Edge test just before we should save the timeline. + mock_time.return_value = current_time + 1.9 + sess.run(self.train_op) # Not saved. + self.assertEqual(2, self._count_timeline_files()) + + mock_time.return_value = current_time + 4.5 + sess.run(self.train_op) # Saved. + self.assertEqual(3, self._count_timeline_files()) + + def test_save_steps_saves_in_first_step(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.ProfilerHook( + save_secs=2, output_dir=self.output_dir) + with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: + sess.run(self.train_op) # Saved. + sess.run(self.train_op) # Not saved. + self.assertEqual(1, self._count_timeline_files()) + + def test_save_steps_saves_periodically(self): + with self.graph.as_default(): + hook = basic_session_run_hooks.ProfilerHook( + save_steps=2, output_dir=self.output_dir) + with monitored_session.SingularMonitoredSession(hooks=[hook]) as sess: + self.assertEqual(0, self._count_timeline_files()) + sess.run(self.train_op) # Saved. + self.assertEqual(1, self._count_timeline_files()) + sess.run(self.train_op) # Not saved. + self.assertEqual(1, self._count_timeline_files()) + sess.run(self.train_op) # Saved. + self.assertEqual(2, self._count_timeline_files()) + sess.run(self.train_op) # Not saved. + self.assertEqual(2, self._count_timeline_files()) + sess.run(self.train_op) # Saved. + self.assertEqual(3, self._count_timeline_files()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index e2a7b28e2b..741dddc991 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -77,6 +77,7 @@ See the @{$python/train} guide. @@GlobalStepWaiterHook @@FinalOpsHook @@FeedFnHook +@@ProfilerHook @@SecondOrStepTimer @@global_step @@basic_train_loop @@ -145,6 +146,7 @@ from tensorflow.python.training.basic_session_run_hooks import SummarySaverHook from tensorflow.python.training.basic_session_run_hooks import GlobalStepWaiterHook from tensorflow.python.training.basic_session_run_hooks import FinalOpsHook from tensorflow.python.training.basic_session_run_hooks import FeedFnHook +from tensorflow.python.training.basic_session_run_hooks import ProfilerHook from tensorflow.python.training.basic_loops import basic_train_loop from tensorflow.python.training.checkpoint_utils import init_from_checkpoint from tensorflow.python.training.checkpoint_utils import list_variables diff --git a/tensorflow/tools/api/golden/tensorflow.train.-profiler-hook.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-profiler-hook.pbtxt new file mode 100644 index 0000000000..4df6c4156a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.train.-profiler-hook.pbtxt @@ -0,0 +1,30 @@ +path: "tensorflow.train.ProfilerHook" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'save_steps\', \'save_secs\', \'output_dir\', \'show_dataflow\', \'show_memory\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'\', \'True\', \'False\'], " + } + member_method { + name: "after_create_session" + argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "after_run" + argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "before_run" + argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "begin" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "end" + argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index 835d3f835d..edc29e62dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -128,6 +128,10 @@ tf_module { name: "Optimizer" mtype: "" } + member { + name: "ProfilerHook" + mtype: "" + } member { name: "ProximalAdagradOptimizer" mtype: "" -- GitLab From a429d07bf545b5fd25c44f95fd50e012440bf99b Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Thu, 5 Oct 2017 12:58:48 -0700 Subject: [PATCH 073/909] Move Head to the new summary API. This may change the names of summaries produced, but will avoid tag collisions. PiperOrigin-RevId: 171194758 --- .../learn/python/learn/estimators/head.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index a67694d1c9..468d792a0d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -33,7 +33,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import logging_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib @@ -635,10 +634,11 @@ def _create_model_fn_ops(features, if (mode != model_fn.ModeKeys.INFER) and (labels is not None): weight_tensor = _weight_tensor(features, weight_column_name) loss, weighted_average_loss = loss_fn(labels, logits, weight_tensor) - # Uses the deprecated API to set the tag explicitly. - # Without it, training and eval losses will show up in different graphs. - logging_ops.scalar_summary( - _summary_key(head_name, mkey.LOSS), weighted_average_loss) + # The name_scope escapism is needed to maintain the same summary tag + # after switching away from the now unsupported API. + with ops.name_scope(""): + summary_loss = array_ops.identity(weighted_average_loss) + summary.scalar(_summary_key(head_name, mkey.LOSS), summary_loss) if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: @@ -1484,8 +1484,12 @@ class _LossOnlyHead(Head): loss = self._loss_fn() if isinstance(loss, list): loss = math_ops.add_n(loss) - logging_ops.scalar_summary( - _summary_key(self.head_name, mkey.LOSS), loss) + # The name_scope escapism is needed to maintain the same summary tag + # after switching away from the now unsupported API. + with ops.name_scope(""): + summary_loss = array_ops.identity(loss) + summary.scalar(_summary_key(self.head_name, mkey.LOSS), + summary_loss) if mode == model_fn.ModeKeys.TRAIN: if train_op_fn is None: raise ValueError("train_op_fn can not be None in TRAIN mode") -- GitLab From 631d3434ff33debfd0bf46d9d8602172f549c82d Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 5 Oct 2017 12:58:51 -0700 Subject: [PATCH 074/909] Adds throlle_secs into run_master PiperOrigin-RevId: 171194766 --- tensorflow/python/estimator/training.py | 74 +++-- tensorflow/python/estimator/training_test.py | 268 +++++++++++++++++-- 2 files changed, 307 insertions(+), 35 deletions(-) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 5c0ebbea35..64b014a6b5 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -519,23 +519,51 @@ class _TrainingExecutor(object): class NewCheckpointListener( basic_session_run_hooks.CheckpointSaverListener): - def __init__(self, estimator, eval_spec, max_training_steps): - # pylint: disable=protected-access - self._evaluator = _TrainingExecutor._Evaluator(estimator, eval_spec, - max_training_steps) - # pylint: enable=protected-access + def __init__(self, evaluator, eval_throttle_secs): + self._evaluator = evaluator + self._eval_throttle_secs = eval_throttle_secs + + def begin(self): + self._timer = basic_session_run_hooks.SecondOrStepTimer( + every_secs=self._eval_throttle_secs) def after_save(self, session, global_step_value): - del session, global_step_value - self._evaluator.evaluate_and_export() + del session # unused; required by signature. + + if self._timer.should_trigger_for_step(global_step_value): + self._timer.update_last_triggered_step(global_step_value) + self._evaluator.evaluate_and_export() + else: + logging.info( + 'Skip the current checkpoint eval due to throttle secs ' + '({} secs).'.format(self._eval_throttle_secs)) + + # Final export signal: For any eval result with global_step >= train + # max_steps, the evaluator will send the final export signal. There is a + # small chance that the Estimator.train stopping logic sees a different + # global_step value (due to global step race condition and the fact the + # saver sees a larger value for checkpoing saving), which does not end + # the training. When the training ends, a new checkpoint is generated, which + # triggers the listener again. So, it could be the case the final export is + # triggered twice. + # + # But here, throttle_secs will skip the next intermediate checkpoint and, + # so, the double final export chance is very small. + evaluator = _TrainingExecutor._Evaluator( + self._estimator, self._eval_spec, self._train_spec.max_steps) # When the underlying `Estimator` object saves a new checkpoint, we would # like this callback to be called so that evaluation and export can trigger. saving_listeners = [ - NewCheckpointListener(self._estimator, self._eval_spec, - self._train_spec.max_steps) + NewCheckpointListener(evaluator, self._eval_spec.throttle_secs) ] - return self._start_distributed_training(saving_listeners=saving_listeners) + self._start_distributed_training(saving_listeners=saving_listeners) + + if not evaluator.is_final_export_triggered: + logging.info('Training has already ended. But the last eval is skipped ' + 'due to eval throttle_secs. Now evaluating the final ' + 'checkpoint.') + evaluator.evaluate_and_export() def run_evaluator(self): """Runs task evaluator.""" @@ -580,6 +608,11 @@ class _TrainingExecutor(object): max_steps=self._train_spec.max_steps, hooks=train_hooks) + # Final export signal: For any eval result with global_step >= train + # max_steps, the evaluator will send the final export signal. The + # _should_stop_local_train will then end the while True as the stopping + # condition is satisfied (both checks use the same global_step value, + # i.e., no race condition) metrics = evaluator.evaluate_and_export() if not metrics: @@ -656,6 +689,11 @@ class _TrainingExecutor(object): self._train_spec.max_steps) return + # Final export signal: For any eval result with global_step >= train + # max_steps, the evaluator will send the final export signal. The next + # iteration of while loop will end the continuous eval as the stopping + # condition is satisfied (both checks use the same global_step value, + # i.e., no race condition) start = time.time() latest_eval_result = evaluator.evaluate_and_export() @@ -673,10 +711,15 @@ class _TrainingExecutor(object): def __init__(self, estimator, eval_spec, max_training_steps): self._estimator = estimator self._eval_spec = eval_spec + self._is_final_export_triggered = False self._previous_ckpt_path = None self._last_warning_time = 0 self._max_training_steps = max_training_steps + @property + def is_final_export_triggered(self): + return self._is_final_export_triggered + def evaluate_and_export(self): """Evaluate and (maybe) export the current model. @@ -720,15 +763,16 @@ class _TrainingExecutor(object): 'Internal error: `Estimator.evaluate` result should have ' '`global_step` in result. Given {}'.format(eval_result)) - # TODO(isaprykin): There is a potential race condition here in the - # distributed setting. The worker job that performs training - # might stop at a later global step value than the evalutor job. is_the_final_export = (eval_result[ops.GraphKeys.GLOBAL_STEP] >= self._max_training_steps if self._max_training_steps else False) self._export_eval_result(eval_result, latest_ckpt_path, is_the_final_export) + if is_the_final_export: + logging.debug('Calling exporter with the `is_the_final_export=True`.') + self._is_final_export_triggered = True + self._last_warning_time = 0 self._previous_ckpt_path = latest_ckpt_path return eval_result @@ -749,8 +793,8 @@ class _TrainingExecutor(object): for exporter in self._eval_spec.exporters: exporter.export( - self._estimator, - os.path.join( + estimator=self._estimator, + export_path=os.path.join( compat.as_str_any(export_dir_base), compat.as_str_any(exporter.name)), checkpoint_path=checkpoint_path, diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 40972ab5a0..8c00ebddf3 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -45,6 +45,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary_iterator from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import monitored_session from tensorflow.python.training import server_lib from tensorflow.python.training import session_run_hook @@ -692,37 +693,145 @@ class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest, mock_sleep.assert_not_called() -class TrainingExecutorRunMasterTest(_TrainingExecutorTrainingTest, - test.TestCase): +class TrainingExecutorRunMasterTest(test.TestCase): """Tests run_chief of _TrainingExecutor.""" - def __init__(self, methodName='runTest'): # pylint: disable=invalid-name - test.TestCase.__init__(self, methodName) - _TrainingExecutorTrainingTest.__init__( - self, - run_config=_create_run_config_with_cluster_spec(_TF_CONFIG_FOR_MASTER)) + def setUp(self): + self._run_config = _create_run_config_with_cluster_spec( + _TF_CONFIG_FOR_MASTER) @test.mock.patch.object(server_lib, 'Server') def test_no_delay_for_master(self, _): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123} mock_est.config = self._run_config mock_train_spec = test.mock.Mock(spec=training.TrainSpec) - mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[]) executor = training._TrainingExecutor(mock_est, mock_train_spec, mock_eval_spec) with test.mock.patch.object(time, 'sleep') as mock_sleep: - self._run_task(executor) + executor.run_master() mock_sleep.assert_not_called() + @test.mock.patch.object(time, 'sleep') + @test.mock.patch.object(server_lib, 'Server') + def test_train_with_train_spec(self, mock_server, unused_mock_sleep): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123} + mock_est.config = self._run_config + train_spec = training.TrainSpec( + input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()]) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[]) + mock_server_instance = mock_server.return_value + + executor = training._TrainingExecutor(mock_est, train_spec, mock_eval_spec) + executor.run_master() + + mock_server.assert_called_with( + mock_est.config.cluster_spec, + job_name=mock_est.config.task_type, + task_index=mock_est.config.task_id, + config=test.mock.ANY, + start=False) + + self.assertTrue(mock_server_instance.start.called) + + mock_est.train.assert_called_with(input_fn=train_spec.input_fn, + max_steps=train_spec.max_steps, + hooks=train_spec.hooks, + saving_listeners=test.mock.ANY) + mock_est.export_savedmodel.assert_not_called() + + @test.mock.patch.object(time, 'sleep') + @test.mock.patch.object(server_lib, 'Server') + def test_no_server_startup_in_google(self, mock_server, unused_mock_sleep): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123} + mock_est.config = self._run_config + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[]) + + executor = training._TrainingExecutor(mock_est, mock_train_spec, + mock_eval_spec) + tf_config = {'TF_CONFIG': json.dumps(_TF_CONFIG_FOR_GOOGLE)} + with test.mock.patch.dict('os.environ', tf_config): + executor.run_master() + mock_server.assert_not_called() + + def test_fail_with_empty_cluster_spec(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) + + mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig) + mock_est.config.cluster_spec = None + mock_est.config.master = 'grpc://...' + mock_est.config.task_type = 'worker' + mock_est.config.task_id = 2 + + with self.assertRaisesRegexp(RuntimeError, + _INVALID_CONFIG_FOR_STD_SERVER_MSG): + training._TrainingExecutor( + mock_est, mock_train_spec, mock_eval_spec).run_master() + + def test_fail_with_empty_master(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) + + mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig) + mock_est.config.cluster_spec = {'worker': 'dummy'} + mock_est.config.master = '' + mock_est.config.task_type = 'worker' + mock_est.config.task_id = 2 + + with self.assertRaisesRegexp(RuntimeError, + _INVALID_CONFIG_FOR_STD_SERVER_MSG): + training._TrainingExecutor( + mock_est, mock_train_spec, mock_eval_spec).run_master() + + def test_fail_with_empty_task_type(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) + + mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig) + mock_est.config.cluster_spec = {'worker': 'dummy'} + mock_est.config.master = 'grpc://...' + mock_est.config.task_type = '' + mock_est.config.task_id = 2 + + with self.assertRaisesRegexp(RuntimeError, + _INVALID_CONFIG_FOR_STD_SERVER_MSG): + training._TrainingExecutor( + mock_est, mock_train_spec, mock_eval_spec).run_master() + + def test_fail_with_none_task_id(self): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_eval_spec = test.mock.Mock(spec=training.EvalSpec) + + mock_est.config = test.mock.PropertyMock(spec=run_config_lib.RunConfig) + mock_est.config.cluster_spec = {'worker': 'dummy'} + mock_est.config.master = 'grpc://...' + mock_est.config.task_type = 'worker' + mock_est.config.task_id = None + + with self.assertRaisesRegexp(RuntimeError, + _INVALID_CONFIG_FOR_STD_SERVER_MSG): + training._TrainingExecutor( + mock_est, mock_train_spec, mock_eval_spec).run_master() + @test.mock.patch.object(server_lib, 'Server') - def test_run_master_triggers_evaluate(self, _): + def test_run_master_triggers_evaluate_and_export(self, _): def estimator_train(saving_listeners, *args, **kwargs): # There shalt be a saving_listener. Estimator is going to call # `after_save`. del args, kwargs + saving_listeners[0].begin() saving_listeners[0].after_save(session=None, global_step_value=None) mock_est = test.mock.Mock( @@ -730,18 +839,14 @@ class TrainingExecutorRunMasterTest(_TrainingExecutorTrainingTest, mock_est.latest_checkpoint.return_value = 'checkpoint_path/' mock_est.config = self._run_config - def export(estimator, *args, **kwargs): - del args, kwargs - estimator.export_was_called = True - exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_whether_export_is_called' - exporter.export = export train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) eval_spec = training.EvalSpec( input_fn=lambda: 1, steps=2, exporters=exporter) - mock_est.evaluate.return_value = {_GLOBAL_STEP_KEY: train_spec.max_steps} + eval_result = {_GLOBAL_STEP_KEY: train_spec.max_steps} + mock_est.evaluate.return_value = eval_result executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) executor.run_master() @@ -752,7 +857,109 @@ class TrainingExecutorRunMasterTest(_TrainingExecutorTrainingTest, steps=eval_spec.steps, checkpoint_path='checkpoint_path/', hooks=eval_spec.hooks) - self.assertTrue(mock_est.export_was_called) + self.assertEqual(1, exporter.export.call_count) + exporter.export.assert_called_with( + estimator=mock_est, + export_path=os.path.join('path/', 'export', exporter.name), + checkpoint_path='checkpoint_path/', + eval_result=eval_result, + is_the_final_export=True) + + @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer') + @test.mock.patch.object(server_lib, 'Server') + def test_run_master_throttle_eval(self, _, mock_timer_class): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + + mock_timer = test.mock.Mock() + mock_timer_class.return_value = mock_timer + + def estimator_train(saving_listeners, *args, **kwargs): + del args, kwargs + saving_listeners[0].begin() + + # Call three times. + mock_timer.should_trigger_for_step.return_value = True + saving_listeners[0].after_save(session=None, global_step_value=None) + + mock_timer.should_trigger_for_step.return_value = False + saving_listeners[0].after_save(session=None, global_step_value=None) + + mock_timer.should_trigger_for_step.return_value = True + saving_listeners[0].after_save(session=None, global_step_value=None) + + mock_est.train = estimator_train + mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2'] + mock_est.config = self._run_config + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_whether_export_is_called' + + train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) + eval_spec = training.EvalSpec( + input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10) + + mock_est.evaluate.side_effect = [ + {_GLOBAL_STEP_KEY: train_spec.max_steps //2}, + {_GLOBAL_STEP_KEY: train_spec.max_steps} + ] + + executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) + executor.run_master() + + self.assertEqual(2, mock_est.evaluate.call_count) + self.assertEqual(2, exporter.export.call_count) + + is_final_export_list = [call[1]['is_the_final_export'] + for call in exporter.export.call_args_list] + self.assertEqual([False, True], is_final_export_list) + + @test.mock.patch.object(basic_session_run_hooks, 'SecondOrStepTimer') + @test.mock.patch.object(server_lib, 'Server') + def test_run_master_throttle_eval_which_skips_final_ckpt( + self, _, mock_timer_class): + mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') + + mock_timer = test.mock.Mock() + mock_timer_class.return_value = mock_timer + + def estimator_train(saving_listeners, *args, **kwargs): + del args, kwargs + saving_listeners[0].begin() + + # Call two times. + mock_timer.should_trigger_for_step.return_value = True + saving_listeners[0].after_save(session=None, global_step_value=None) + + # The final ckpt is skipped by the timer. It will be picked up the final + # export check in the code. + mock_timer.should_trigger_for_step.return_value = False + saving_listeners[0].after_save(session=None, global_step_value=None) + + mock_est.train = estimator_train + mock_est.latest_checkpoint.side_effect = ['ckpt1', 'ckpt2'] + mock_est.config = self._run_config + + exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) + exporter.name = 'see_whether_export_is_called' + + train_spec = training.TrainSpec(input_fn=lambda: 1, max_steps=300) + eval_spec = training.EvalSpec( + input_fn=lambda: 1, steps=2, exporters=exporter, throttle_secs=10) + + mock_est.evaluate.side_effect = [ + {_GLOBAL_STEP_KEY: train_spec.max_steps //2}, + {_GLOBAL_STEP_KEY: train_spec.max_steps} + ] + + executor = training._TrainingExecutor(mock_est, train_spec, eval_spec) + executor.run_master() + + self.assertEqual(2, mock_est.evaluate.call_count) + self.assertEqual(2, exporter.export.call_count) + + is_final_export_list = [call[1]['is_the_final_export'] + for call in exporter.export.call_args_list] + self.assertEqual([False, True], is_final_export_list) class TrainingExecutorRunEvaluatorTest(test.TestCase): @@ -803,6 +1010,19 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' + mock_est.times_export_was_called = 0 + mock_est.times_final_export_was_called = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result + estimator.times_export_was_called += 1 + # final_export is happend at the end. + self.assertEqual(0, estimator.times_final_export_was_called) + if is_the_final_export: + estimator.times_final_export_was_called += 1 + + exporter.export = export + eval_spec = training.EvalSpec( input_fn=lambda: 1, start_delay_secs=0, @@ -813,7 +1033,8 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase): executor.run_evaluator() self.assertEqual(2, mock_est.evaluate.call_count) - self.assertEqual(2, exporter.export.call_count) + self.assertEqual(2, mock_est.times_export_was_called) + self.assertEqual(1, mock_est.times_final_export_was_called) def test_final_export_is_true_in_the_end(self): training_max_step = 200 @@ -1135,9 +1356,15 @@ class TrainingExecutorRunLocalTest(test.TestCase): mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn mock_est.times_export_was_called = 0 - def export(estimator, *args, **kwargs): - del args, kwargs + mock_est.times_final_export_was_called = 0 + def export(estimator, export_path, checkpoint_path, eval_result, + is_the_final_export): + del export_path, checkpoint_path, eval_result estimator.times_export_was_called += 1 + # final_export is happend at the end. + self.assertEqual(0, estimator.times_final_export_was_called) + if is_the_final_export: + estimator.times_final_export_was_called += 1 exporter = test.mock.PropertyMock(spec=exporter_lib.Exporter) exporter.name = 'see_how_many_times_export_is_called' @@ -1165,6 +1392,7 @@ class TrainingExecutorRunLocalTest(test.TestCase): self.assertEqual(3, mock_est.train.call_count) self.assertEqual(3, mock_est.evaluate.call_count) self.assertEqual(3, mock_est.times_export_was_called) + self.assertEqual(1, mock_est.times_final_export_was_called) def test_handles_no_new_checkpoint_found(self): mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/') -- GitLab From c8b3f67ba3f8895ebaf0cc78f1859a604ac68c16 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 13:01:02 -0700 Subject: [PATCH 075/909] Fix checkpoint_path is None handling in export_fn of make_best_model_export_strategy. PiperOrigin-RevId: 171195079 --- .../learn/python/learn/utils/saved_model_export_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index ee8856ac34..5975103f4f 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -50,6 +50,7 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils +from tensorflow.python.training import saver from tensorflow.python.util import compat @@ -616,7 +617,13 @@ def make_best_model_export_strategy(serving_input_fn, Returns: The string path to the exported directory. """ - + if not checkpoint_path: + # TODO(b/67425018): switch to + # checkpoint_path = estimator.latest_checkpoint() + # as soon as contrib is cleaned up and we can thus be sure that + # estimator is a tf.estimator.Estimator and not a + # tf.contrib.learn.Estimator + checkpoint_path = saver.latest_checkpoint(estimator.model_dir) export_checkpoint_path, export_eval_result = best_model_selector.update( checkpoint_path, eval_result) -- GitLab From b56568b8db2b5cfedf53d92ddcff13e3603fbc29 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 5 Oct 2017 13:31:18 -0700 Subject: [PATCH 076/909] Disable six tests. One is too big, three are broken due to knowon matrix_set_diag issues on windows, one is failing due to numerical discrepancies between OSs, and one is broken when multiple GPUs are present. PiperOrigin-RevId: 171199546 --- tensorflow/contrib/cmake/tf_tests.cmake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 55d57b7574..4cf22a9c47 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -229,6 +229,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/cholesky_op_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/linalg_ops_test.py" + "${tensorflow_source_dir}/tensorflow/python/ops/init_ops.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py" # misc "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reshape_op_test.py" @@ -244,6 +246,9 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/training/supervisor_test.py" # Flaky I/O error on rename. "${tensorflow_source_dir}/tensorflow/python/training/sync_replicas_optimizer_test.py" # Needs portpicker. "${tensorflow_source_dir}/tensorflow/python/training/server_lib_test.py" # Test occasionally deadlocks. + "${tensorflow_source_dir}/tensorflow/python/debug/lib/session_debug_multi_gpu_test.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/concat_op_test.py" # numerical issues + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/linalg_grad_test.py" # cudaSolver handle creation fails. "${tensorflow_source_dir}/tensorflow/python/kernel_tests/array_ops_test.py" # depends on python/framework/test_ops # Dataset tests @@ -303,6 +308,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_test.py" # Test should only be run manually "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reduction_ops_test_big.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/svd_op_test.py" ) endif() list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) -- GitLab From 94b81fabaedc85a143fca37304b5b143f936f541 Mon Sep 17 00:00:00 2001 From: Mike Case Date: Thu, 5 Oct 2017 13:38:39 -0700 Subject: [PATCH 077/909] Make GCS and HDFS default build options. --- configure.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.py b/configure.py index 9ca614f8f9..6d22d33b99 100644 --- a/configure.py +++ b/configure.py @@ -988,9 +988,9 @@ def main(): set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', 'with_jemalloc', True) set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', - 'with_gcp_support', False, 'gcp') + 'with_gcp_support', True, 'gcp') set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', - 'with_hdfs_support', False, 'hdfs') + 'with_hdfs_support', True, 'hdfs') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', -- GitLab From 2198b8cfe8acb5af7bb5a1dac54c18ff72c98002 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Thu, 5 Oct 2017 13:41:54 -0700 Subject: [PATCH 078/909] Minimize python code in supporting TF_Function. After this change when C API is enabled, function support in Python is done with minimal use of Python code. In particular, we don't create or store FunctionDef in Python. Small changes include: - We don't use _hash_str for function comparisons in Python. Instead, we delegate this logic to TF_GraphCopyFunction in C API. - We checking for duplication function additions from _DefinedFunction.add_to_graph(graph) to Graph._add_function in all cases. This is more logical and make it easier to support both modes. - We change some error messages to be same in both modes. - Since we don't store FunctionDef in C API mode in Python but get it on demand, access to common attributes like name or signature can become expensive. To mitigate this, we cache the signature (OpDef) of the function in Python. Signatures are generally much smaller than whole definitions. - Add context manager for creating and destroying TF_Buffers. - Allow zero output tensorflow functions in Python The C API and C++ runtime support functions without outputs, but Python APIs explicitly disallowed them before this change. This change allows zero output functions in Python and cleans some hacks that were added to side-step regular Python function APIs before. PiperOrigin-RevId: 171201162 --- tensorflow/compiler/tests/jit_test.py | 27 +-- tensorflow/python/framework/c_api_util.py | 23 +++ tensorflow/python/framework/function.py | 160 ++++++++++++------ tensorflow/python/framework/function_test.py | 40 ++--- .../python/framework/graph_to_function_def.py | 9 +- tensorflow/python/framework/ops.py | 32 +++- 6 files changed, 180 insertions(+), 111 deletions(-) diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py index 11914080ec..2d8236e2cb 100644 --- a/tensorflow/compiler/tests/jit_test.py +++ b/tensorflow/compiler/tests/jit_test.py @@ -21,15 +21,12 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.compiler import jit -from tensorflow.core.framework import function_pb2 -from tensorflow.core.framework import node_def_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl @@ -118,31 +115,13 @@ class JitLaunchTest(test.TestCase): def testNoOutputs(self): with session_lib.Session() as sess: - # Build a function with a single Const node, whose output is ignored. - fdef = function_pb2.FunctionDef() - fdef.signature.name = "KernelWithNoOutputs" - node = node_def_pb2.NodeDef() - node.op = "Const" - node.name = "ignored" - node.attr["dtype"].type = dtypes.int32.as_datatype_enum - tensor = tensor_util.make_tensor_proto([0], dtype=dtypes.int32, shape=[]) - node.attr["value"].tensor.CopyFrom(tensor) - fdef.node_def.extend([node]) # Check that calling the result as a compiled kernel doesn't crash. @function.Defun(compiled=True) def KernelWithNoOutputs(): - return constant_op.constant(100) - - # Hack to override the definition. By accessing .definition, we - # force the _DefinedFunction initialized internally. Then, we - # replace it's internal FunctionDef proto. We do this hack here - # because one typically can't construct KernelWithNoOutputs - # function via Defun decorator directly. - _ = KernelWithNoOutputs.definition - foo = KernelWithNoOutputs - foo._definition = fdef - call = KernelWithNoOutputs() + a = constant_op.constant(100) # pylint: disable=unused-variable + + call = KernelWithNoOutputs() # pylint: disable=assignment-from-no-return sess.run(call, {}) def testAliasing(self): diff --git a/tensorflow/python/framework/c_api_util.py b/tensorflow/python/framework/c_api_util.py index 379ba19def..ddababd5b8 100644 --- a/tensorflow/python/framework/c_api_util.py +++ b/tensorflow/python/framework/c_api_util.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python import pywrap_tensorflow as c_api +from tensorflow.python.util import tf_contextlib class ScopedTFStatus(object): @@ -46,3 +47,25 @@ class ScopedTFGraph(object): # terminating) we can have already deleted other modules. if c_api.TF_DeleteGraph is not None: c_api.TF_DeleteGraph(self.graph) + + +@tf_contextlib.contextmanager +def tf_buffer(): + """Context manager that creates and deletes TF_Buffer. + + Example usage: + wtih tf_buffer() as buf: + # get serialized graph def into buf + ... + proto_data = c_api.TF_GetBuffer(buf) + graph_def.ParseFromString(compat.as_bytes(proto_data)) + # buf has been deleted + + Yields: + Created TF_Buffer + """ + buf = c_api.TF_NewBuffer() + try: + yield buf + finally: + c_api.TF_DeleteBuffer(buf) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 068e3125aa..7068e72009 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -25,8 +25,10 @@ import collections import hashlib from tensorflow.core.framework import attr_value_pb2 +from tensorflow.core.framework import function_pb2 from tensorflow.python import pywrap_tensorflow as c_api from tensorflow.python.eager import context +from tensorflow.python.framework import c_api_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import graph_to_function_def @@ -242,9 +244,17 @@ class _DefinedFunction(object): self._shape_func = shape_func self._capture_by_value = capture_by_value self._extra_kwargs = kwargs - self._definition = None # Constructed lazily. - self._c_func = None # Constructed with definition. - self._sub_functions = dict() # Constructed with definition. + # Constructed only when C API is disabled, lazily + self._definition = None + # Constructed only when C API is enabled, lazily + self._c_func = None + self._sub_functions = dict() # Constructed with _definition or _c_func + + # Cached OpDef for this function. When C API is enabled, this is + # the only part of FunctionDef that we cache in Python. When C API + # is disabled the whole _definition is available and this is simply + # another reference to _definition.signature + self._op_def = None self._args = [] assert isinstance(input_types, (list, tuple)) @@ -263,8 +273,21 @@ class _DefinedFunction(object): def definition(self): """Function definition proto.""" self._create_definition_if_needed() + if self._c_func: + with c_api_util.tf_buffer() as buf: + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_FunctionToFunctionDef(self._c_func, buf, status) + fdef = function_pb2.FunctionDef() + proto_data = c_api.TF_GetBuffer(buf) + fdef.ParseFromString(compat.as_bytes(proto_data)) + return fdef return self._definition + @property + def _signature(self): + self._create_definition_if_needed() + return self._op_def + def set_grad_func(self, grad_func): """Specifies the gradient function of this function.""" assert not self._grad_func @@ -299,7 +322,7 @@ class _DefinedFunction(object): def _create_definition_if_needed_impl(self): """This is not what you want, see _create_definition_if_needed.""" - if self._definition is not None: + if self._definition is not None or self._c_func is not None: return # Create the func_def object. @@ -313,11 +336,23 @@ class _DefinedFunction(object): # Call func and gather the output tensors. with vs.variable_scope("", custom_getter=temp_graph.getvar): outputs = self._func(*inputs) - # If func only returned one value, make it a tuple. - if not isinstance(outputs, (list, tuple)): - outputs = (outputs,) - if any([_ is None for _ in outputs]): - raise ValueError("Function can not return None.") + + # There is no way of distinguishing between a function not returning + # anything and a function returning None in Python. + # We need to allow the former and ideally want to forbid the latter as + # it is most likely user error. + # TODO(iga): Consider adding a @NoOutput decorator on top of @Defun to + # allow users to explicitly mark the function as not returning anything. + # For now, we allow a single None return and interpret it as a function + # with no output. + if outputs is None: + outputs = [] + else: + # If func only returned one value, make it a tuple. + if not isinstance(outputs, (list, tuple)): + outputs = (outputs,) + if any([_ is None for _ in outputs]): + raise ValueError("Function can not return None.") # Ensures each output is a Tensor. outputs = [ops.convert_to_tensor(_) for _ in outputs] self._extra_inputs = temp_graph.extra_inputs @@ -326,44 +361,47 @@ class _DefinedFunction(object): self._sub_functions = temp_graph._functions # pylint: enable=protected-access - # Build the FunctionDef - self._definition = graph_to_function_def.graph_to_function_def( - temp_graph, - temp_graph.get_operations(), - inputs, - outputs, - out_names=self._out_names) - # Extra kwargs are treated as attrs on the function def. - sig_pre_func_name = self._func_name or _get_func_name(self._func) - kwargs_attr = _parse_kwargs_as_attrs(sig_pre_func_name, + base_func_name = self._func_name or _get_func_name(self._func) + kwargs_attr = _parse_kwargs_as_attrs(base_func_name, **self._extra_kwargs) - for k in kwargs_attr: - self._definition.attr[k].CopyFrom(kwargs_attr[k]) - - # Hash the definition and its dependencies. - self._hash_str = self._create_hash_str( - self._definition.signature.input_arg, - self._definition.signature.output_arg, self._definition.node_def) - - # Finally, we decide the function name to use. If not specified, - # make up something which is almost certainly unique (but deterministic). - if not self._func_name: - self._func_name = "_".join([_get_func_name(self._func), self._hash_str]) - self._definition.signature.name = self._func_name - if self._func.__doc__: - self._definition.signature.description = self._func.__doc__ - # pylint: disable=protected-access - if temp_graph._c_graph: + if not temp_graph._c_graph: # pylint: disable=protected-access + # Build the FunctionDef + self._definition = graph_to_function_def.graph_to_function_def( + temp_graph, + temp_graph.get_operations(), + inputs, + outputs, + out_names=self._out_names) + + for k in kwargs_attr: + self._definition.attr[k].CopyFrom(kwargs_attr[k]) + + # Hash the definition and its dependencies. + self._hash_str = self._create_hash_str( + self._definition.signature.input_arg, + self._definition.signature.output_arg, self._definition.node_def) + + # Finally, we decide the function name to use. If not specified, + # make up something which is almost certainly unique (but deterministic). + if not self._func_name: + self._func_name = "_".join([base_func_name, self._hash_str]) + self._definition.signature.name = self._func_name + if self._func.__doc__: + self._definition.signature.description = self._func.__doc__ + + self._op_def = self._definition.signature + else: # C API is enabled output_names = ([compat.as_bytes(x) for x in self._out_names] if self._out_names else []) description = self._func.__doc__ or None + # pylint: disable=protected-access with errors.raise_exception_on_not_ok_status() as status: self._c_func = c_api.TF_GraphToFunction_wrapper( temp_graph._c_graph, - self._func_name, - False, # append_hash_to_fn_name + base_func_name, + self._func_name is None, # append_hash_to_fn_name None, # opers [t._as_tf_output() for t in inputs], [t._as_tf_output() for t in outputs], @@ -371,8 +409,15 @@ class _DefinedFunction(object): None, # opts description, status) + # pylint: enable=protected-access self._set_c_attrs(kwargs_attr) - # pylint: enable=protected-access + + # Set cached fields: _op_def and _func_name (if not already set) + self._op_def = self.definition.signature + if self._func_name: + assert self._func_name == self._op_def.name + else: + self._func_name = self._op_def.name def _set_c_attrs(self, attrs): """Sets `attrs` as attributes of self._c_func. @@ -440,13 +485,8 @@ class _DefinedFunction(object): """Adds this function into the graph g.""" self._create_definition_if_needed() - # pylint: disable=protected-access - # If 'g' has an identical function already, do nothing. - prev = g._get_function(self.name) - if prev and (prev._hash_str == self._hash_str): - return - # Adds this function into 'g'. + # pylint: disable=protected-access if context.in_graph_mode(): g._add_function(self) else: @@ -464,7 +504,7 @@ class _DefinedFunction(object): def __call__(self, *args, **kwargs): self.add_to_graph(ops.get_default_graph()) args = [ops.convert_to_tensor(_) for _ in args] + self._extra_inputs - ret, op = _call(self._definition.signature, *args, **kwargs) + ret, op = _call(self._signature, *args, **kwargs) if self._shape_func is not None: shapes = self._shape_func(op) if len(shapes) != len(op.outputs): @@ -554,7 +594,7 @@ class _OverloadedFunction(object): # right input types. output_types = [ dtypes.DType(_.type) - for _ in defined.definition.signature.output_arg + for _ in defined._signature.output_arg # pylint: disable=protected-access ] # pylint: disable=protected-access defined._grad_func = self._grad_func.instantiate( @@ -759,6 +799,9 @@ def _from_definition(fdef, grad_func=None): Returns: A _DefinedFunction representing fdef """ + # TODO(iga): This method does major surgery on _DefinedFunction. + # Make it a named constructor using @classmethod of _DefinedFunction. + # The Python callable is only needed to create a FunctionDef. Since we have # the FunctionDef here, we don't need to set _DefinedFunction._func (nor do we # have access to such a callable here). @@ -774,15 +817,22 @@ def _from_definition(fdef, grad_func=None): result = _DefinedFunction(func, argnames, input_types, func_name, grad_func, python_grad_func, out_names) # pylint: disable=protected-access - result._definition = fdef - # Captured inputs are added as regular inputs to a function when it's - # serialized, i.e. any extra inputs from the original function are now - # included in `result`._args - result._extra_inputs = [] - result._hash_str = result._create_hash_str( - result._definition.signature.input_arg, - result._definition.signature.output_arg, result._definition.node_def) + if ops._USE_C_API: + serialized = fdef.SerializeToString() + with errors.raise_exception_on_not_ok_status() as status: + result._c_func = c_api.TF_FunctionImportFunctionDef(serialized, status) + result._extra_inputs = [] + else: + result._definition = fdef + # Captured inputs are added as regular inputs to a function when it's + # serialized, i.e. any extra inputs from the original function are now + # included in `result`._args + result._extra_inputs = [] + result._hash_str = result._create_hash_str( + result._definition.signature.input_arg, + result._definition.signature.output_arg, result._definition.node_def) # pylint: enable=protected-access + return result diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 3c359b8700..fea2129922 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -107,8 +107,9 @@ class FunctionTest(test.TestCase): with ops.Graph().as_default(): with self.assertRaisesRegexp( - ValueError, (r"Length of out_names \(2\) does not match number of " - r"outputs \(1\): my_result1, my_result2")): + errors_impl.InvalidArgumentError, + (r"output names must be either empty or equal in size to outputs. " + "output names size = 2 outputs size = 1")): MyIdentityFunc([18.0]) def testDefineFunction2Args(self): @@ -123,18 +124,16 @@ class FunctionTest(test.TestCase): with session.Session() as sess: self.assertAllEqual([5.0], sess.run(call)) - def testValueErrorOnFunctionWithNoOutput(self): - # TODO(iga): Remove this restriction and this test + def testFunctionWithNoOutput(self): @function.Defun(dtypes.float32, dtypes.float32) def APlus2B(a, b): - print(a + b * 2) # Create some ops to have nodes in the body - # Using 'print' to make lint happy + c = a + b * 2 # Create some ops to have nodes in the body + print(c) # Using 'print' to make lint happy with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, - "Function can not return None"): - APlus2B([1.0], [2.0]) + # Call function. There should be no exceptions. + APlus2B([1.0], [2.0]) def testDefineFunction2ArgsOutputName(self): @@ -499,14 +498,6 @@ class FunctionTest(test.TestCase): def testDefineErrors(self): with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, "can not return None"): - - @function.Defun() - def NoResult(): - pass - - _ = NoResult.definition - with self.assertRaisesRegexp(ValueError, "can not return None"): @function.Defun() @@ -730,7 +721,14 @@ class FunctionTest(test.TestCase): def Foo(x, y, z): return math_ops.tanh(math_ops.matmul(x, y) + z) - self.assertEqual("Foo_d643acf7", Foo.instantiate([dtypes.float32] * 3).name) + # We added more randomness to function names in C API. + # TODO(iga): Remove this if statement when we switch to C API. + if ops._USE_C_API: # pylint: disable=protected-access + self.assertEqual("Foo_aCYSbwBkR5A", + Foo.instantiate([dtypes.float32] * 3).name) + else: + self.assertEqual("Foo_d643acf7", + Foo.instantiate([dtypes.float32] * 3).name) def testSignatureHash(self): # Foo.Inner and Bar.Inner have identical function body but have @@ -1007,7 +1005,8 @@ class FunctionsFromProtos(test.TestCase): library.function.extend([F1.definition]) with self.assertRaisesRegexp( - ValueError, "FunctionDefLibrary missing 'G1_........' FunctionDef"): + ValueError, + "FunctionDefLibrary missing 'G1_[0-9a-zA-Z]{8,11}' FunctionDef"): function._from_library(library) # Create invalid function def that is missing F1 function def @@ -1016,7 +1015,8 @@ class FunctionsFromProtos(test.TestCase): library.function.extend([G1.definition]) with self.assertRaisesRegexp( - ValueError, "FunctionDefLibrary missing 'F1_........' FunctionDef"): + ValueError, + "FunctionDefLibrary missing 'F1_[0-9a-zA-Z]{8,11}' FunctionDef"): function._from_library(library) def testFromLibraryCyclicGradFuncs(self): diff --git a/tensorflow/python/framework/graph_to_function_def.py b/tensorflow/python/framework/graph_to_function_def.py index 33a417a1da..448f87aa6e 100644 --- a/tensorflow/python/framework/graph_to_function_def.py +++ b/tensorflow/python/framework/graph_to_function_def.py @@ -22,6 +22,7 @@ import re from tensorflow.core.framework import function_pb2 from tensorflow.core.framework import op_def_pb2 +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import op_def_registry @@ -151,9 +152,11 @@ def graph_to_function_def(graph, operations, inputs, outputs, out_names=None): func.signature.output_arg.extend( [_tensor_to_argdef(o, used_names=used_names) for o in outputs]) elif len(outputs) != len(out_names): - raise ValueError( - "Length of out_names (%d) does not match number of outputs (%d): %s" % - (len(out_names), len(outputs), ", ".join(out_names))) + raise errors_impl.InvalidArgumentError( + None, None, + "output names must be either empty or equal in size to outputs. " + "output names size = %d outputs size = %d" % + (len(out_names), len(outputs))) elif len(out_names) != len(set(out_names)): raise ValueError( "Must not have duplicates in out_names: %s" % ", ".join(out_names)) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index ae84297690..e6e6b9c6ca 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2808,19 +2808,14 @@ class Graph(object): ValueError: if another function is defined with the same name. """ name = function.name - previous = self._functions.get(name, None) - if previous: - raise ValueError("Another function is already defined with that name") # Sanity checks on gradient definition. if (function.grad_func_name is not None) and (function.python_grad_func is not None): raise ValueError("Gradient defined twice for function %s" % name) - # Need a new-enough consumer to support the functions we add to the graph. - if self._graph_def_versions.min_consumer < 12: - self._graph_def_versions.min_consumer = 12 - self._functions[name] = function + + # Add function to graph + # pylint: disable=protected-access if self._c_graph: - # pylint: disable=protected-access assert function._c_func, ( "Cannot add function created without C API support to graph " "created with C API support") @@ -2828,7 +2823,26 @@ class Graph(object): gradient = function._grad_func._c_func if function._grad_func else None c_api.TF_GraphCopyFunction(self._c_graph, function._c_func, gradient, status) - # pylint: enable=protected-access + else: + # If there is already a function with the same name, raise an error + # if bodies are different. Else, do nothing. The C API version above + # has the same behavior. + previous = self._functions.get(name, None) + if previous: + # This check is not ideal as we can have a hash collision with only + # 32 bits in the hash, but the non C API mode is being deprecated. + # Don't bother changing it now. + if previous._hash_str == function._hash_str: + return + else: + raise ValueError("Another function is already defined with that name") + # pylint: enable=protected-access + + self._functions[name] = function + + # Need a new-enough consumer to support the functions we add to the graph. + if self._graph_def_versions.min_consumer < 12: + self._graph_def_versions.min_consumer = 12 @property def building_function(self): -- GitLab From 83b5768431bb06d749cf67ab64d9cd3fd36ec943 Mon Sep 17 00:00:00 2001 From: Fan Xia Date: Thu, 5 Oct 2017 14:22:01 -0700 Subject: [PATCH 079/909] Make code Python 2 and 3 compatible (#13489) Update the Python implementation so that both Python 2 and Python 3 environment can execute --- tensorflow/docs_src/get_started/estimator.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/docs_src/get_started/estimator.md b/tensorflow/docs_src/get_started/estimator.md index 4f3a438d17..11c3dc6e53 100644 --- a/tensorflow/docs_src/get_started/estimator.md +++ b/tensorflow/docs_src/get_started/estimator.md @@ -28,7 +28,7 @@ from __future__ import division from __future__ import print_function import os -import urllib +from six.moves.urllib.request import urlopen import numpy as np import tensorflow as tf @@ -44,13 +44,13 @@ IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv" def main(): # If the training and test sets aren't stored locally, download them. if not os.path.exists(IRIS_TRAINING): - raw = urllib.urlopen(IRIS_TRAINING_URL).read() - with open(IRIS_TRAINING, "w") as f: + raw = urlopen(IRIS_TRAINING_URL).read() + with open(IRIS_TRAINING, "wb") as f: f.write(raw) if not os.path.exists(IRIS_TEST): - raw = urllib.urlopen(IRIS_TEST_URL).read() - with open(IRIS_TEST, "w") as f: + raw = urlopen(IRIS_TEST_URL).read() + with open(IRIS_TEST, "wb") as f: f.write(raw) # Load datasets. @@ -167,7 +167,7 @@ from __future__ import division from __future__ import print_function import os -import urllib +from six.moves.urllib.request import urlopen import tensorflow as tf import numpy as np @@ -184,13 +184,13 @@ them. ```python if not os.path.exists(IRIS_TRAINING): - raw = urllib.urlopen(IRIS_TRAINING_URL).read() - with open(IRIS_TRAINING,'w') as f: + raw = urlopen(IRIS_TRAINING_URL).read() + with open(IRIS_TRAINING,'wb') as f: f.write(raw) if not os.path.exists(IRIS_TEST): - raw = urllib.urlopen(IRIS_TEST_URL).read() - with open(IRIS_TEST,'w') as f: + raw = urlopen(IRIS_TEST_URL).read() + with open(IRIS_TEST,'wb') as f: f.write(raw) ``` -- GitLab From 91df2c942ebf4bd048edba055418467cae510431 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Thu, 5 Oct 2017 14:22:26 -0700 Subject: [PATCH 080/909] Give accumulate_n op a gradient (version 2) (#13325) * Changed accumulate_n ==> accumulate_n_v2 and moved to contrib * Moving source files to contrib. * Better startup message. * Fixing up build * Removal of temporary code. * Reduce logging output * Fixing build issues. * CI sanity fixes. * Cleanup prior to PR * Cleanup * Cleanup. * Cleanup. * Cleanup. * Moved AccumulateNV2 to main build and added fallback to AddN for eager mode * Fixing CI issues --- tensorflow/contrib/framework/BUILD | 29 ++- .../framework/python/ops/accumulate_n_v2.py | 111 ++++++++++ .../python/ops/accumulate_n_v2_eager_test.py | 84 ++++++++ .../python/ops/accumulate_n_v2_test.py | 123 +++++++++++ tensorflow/core/BUILD | 1 + .../common_runtime/accumulate_n_optimizer.cc | 191 ++++++++++++++++++ tensorflow/core/ops/math_ops.cc | 32 +++ tensorflow/python/ops/hidden_ops.txt | 2 + 8 files changed, 571 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2.py create mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py create mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py create mode 100644 tensorflow/core/common_runtime/accumulate_n_optimizer.cc diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 6b0599ddd2..dd882acb8e 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -10,9 +10,8 @@ package(default_visibility = [ "//tensorflow:__subpackages__", ]) -load("//tensorflow:tensorflow.bzl", "cuda_py_test") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") @@ -27,6 +26,7 @@ tf_custom_op_py_library( "python/framework/experimental.py", "python/framework/tensor_util.py", "python/ops/__init__.py", + "python/ops/accumulate_n_v2.py", "python/ops/arg_scope.py", "python/ops/audio_ops.py", "python/ops/checkpoint_ops.py", @@ -149,6 +149,31 @@ py_test( ], ) +py_test( + name = "accumulate_n_v2_test", + size = "small", + srcs = ["python/ops/accumulate_n_v2_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":framework_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + ], +) + +py_test( + name = "accumulate_n_v2_eager_test", + size = "small", + srcs = ["python/ops/accumulate_n_v2_eager_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":framework_py", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python/eager:backprop", + ], +) + py_test( name = "ops_test", size = "small", diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py new file mode 100644 index 0000000000..a0667bd489 --- /dev/null +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py @@ -0,0 +1,111 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Ops that will eventually be folded into tensorflow/python/ops/math_ops.py +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops + + + +def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): + """Returns the element-wise sum of a list of tensors. + + Optionally, pass `shape` and `tensor_dtype` for shape and type checking, + otherwise, these are inferred. + + `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not + wait for all of its inputs to be ready before beginning to sum. This can + save memory if inputs are ready at different times, since minimum temporary + storage is proportional to the output size rather than the inputs size. + + Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. + + For example: + + ```python + a = tf.constant([[1, 2], [3, 4]]) + b = tf.constant([[5, 0], [0, 6]]) + tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] + + # Explicitly pass shape and type + tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + # [[7, 4], + # [6, 14]] + ``` + + Args: + inputs: A list of `Tensor` objects, each with same shape and type. + shape: Shape of elements of `inputs`. + tensor_dtype: The type of `inputs`. + name: A name for the operation (optional). + + Returns: + A `Tensor` of same shape and type as the elements of `inputs`. + + Raises: + ValueError: If `inputs` don't all have same shape and dtype or the shape + cannot be inferred. + """ + _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor" + "with the same dtype and shape") + if not inputs or not isinstance(inputs, (list, tuple)): + raise _INPUTS_ERR_MSG + inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) + if not all(isinstance(x, ops.Tensor) for x in inputs): + raise _INPUTS_ERR_MSG + if not all(x.dtype == inputs[0].dtype for x in inputs): + raise _INPUTS_ERR_MSG + if shape is not None: + shape = tensor_shape.as_shape(shape) + else: + shape = tensor_shape.unknown_shape() + for input_tensor in inputs: + if isinstance(input_tensor, ops.Tensor): + shape = shape.merge_with(input_tensor.get_shape()) + + # tensor_dtype is for safety only; operator's output type computed in C++ + if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: + raise TypeError("tensor_dtype is {}, but input is of type {}" + .format(tensor_dtype, inputs[0].dtype)) + + if len(inputs) == 1 and name is None: + return inputs[0] + elif len(inputs) == 1 and name is not None: + return array_ops.identity(inputs[0], name=name) + elif context.in_eager_mode(): + # TemporaryVariable not currently supported in eager mode; fall back + # onto AddN for now. + # TODO(frreiss) remove this once the lifetime of eager variables gets + # addressed + return math_ops.add_n(inputs, name=name) + else: + return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) + +# The following code should eventually be merged into +# tensorflow/python/ops/math_grad.py +@ops.RegisterGradient("AccumulateNV2") +def _AddNGrad(op, grad): + """Same as gradient for AddN. Copies the gradient to all inputs.""" + # Not broadcasting. + return [grad] * len(op.inputs) + diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py new file mode 100644 index 0000000000..8c618838bf --- /dev/null +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py @@ -0,0 +1,84 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for new version of accumulate_n op that will eventually go into +`ops.math_ops`. + +These test cases spefically exercise the `eager` APIs. They need to be in a +separate file from the remaining tests because eager mode is currently something +you can turn on but can't turn off for the lifetime of the current process.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 + +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context as eager_context +from tensorflow.python.eager import tape + + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.platform import test + + + +class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): + """Tests of the new, differentiable version of accumulate_n""" + + def testMinimalEagerMode(self): + forty = constant_op.constant(40) + two = constant_op.constant(2) + answer = av2.accumulate_n_v2([forty, two]) + self.assertEqual(42, answer.numpy()) + + + def testFloat(self): + np.random.seed(12345) + x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] + tf_x = ops.convert_n_to_tensor(x) + with self.test_session(use_gpu=True): + self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).numpy()) + self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).numpy()) + + def testGrad(self): + np.random.seed(42) + num_inputs = 3 + input_vars = [ + resource_variable_ops.ResourceVariable(10.0 * np.random.random()) + for i in range(0, num_inputs) + ] + + def fn(first, second, third): + return av2.accumulate_n_v2([first, second, third]) + + grad_fn = backprop.gradients_function(fn) + grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + [elem.numpy() for elem in grad]) + + + +if __name__ == "__main__": + eager_context.enable_eager_execution() + test.main() + diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py new file mode 100644 index 0000000000..3386e849d5 --- /dev/null +++ b/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py @@ -0,0 +1,123 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for new version of accumulate_n op that will eventually go into +`ops.math_ops`.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gradients +from tensorflow.python.ops import variables +from tensorflow.python.platform import googletest + + + +class AccumulateNV2Test(test_util.TensorFlowTestCase): + """Tests of the new, differentiable version of accumulate_n""" + + def testFloat(self): + np.random.seed(12345) + x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] + tf_x = ops.convert_n_to_tensor(x) + with self.test_session(use_gpu=True): + self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).eval()) + self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).eval()) + + def testInt(self): + np.random.seed(54321) + x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)] + tf_x = ops.convert_n_to_tensor(x) + with self.test_session(use_gpu=True): + self.assertAllEqual(sum(x), av2.accumulate_n_v2(tf_x).eval()) + self.assertAllEqual(x[0] * 6, av2.accumulate_n_v2([tf_x[0]] * 6).eval()) + + def testGrad(self): + np.random.seed(42) + for num_inputs in range(1, 10): + with self.test_session(use_gpu=True) as sess: + input_vars = [ + variables.Variable(10.0 * np.random.random()) + for i in range(0, num_inputs) + ] + accum_n = av2.accumulate_n_v2(input_vars) + sess.run(variables.global_variables_initializer()) + accum_n_grad = gradients.gradients(accum_n, input_vars) + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + [g.eval() for g in accum_n_grad]) + + # The tests below used to be in a separate class under cwise_ops_test.py, + # which did not run in the default test target. + # Putting them here so that everything that exercises AccumulateNV2 is in + # one place and the default build runs all unit tests. + def testSimple(self): + with self.test_session(): + random_arrays = [ + np.random.rand(16, 16, 16, 16).astype(np.float32) for _ in range(20) + ] + random_tensors = [ + ops.convert_to_tensor( + x, dtype=dtypes_lib.float32) for x in random_arrays + ] + tf_val = av2.accumulate_n_v2(random_tensors) + np_val = random_arrays[0] + for random_array in random_arrays[1:]: + np_val += random_array + self.assertAllClose(np_val, tf_val.eval()) + + def testZeroArgs(self): + with self.test_session(): + with self.assertRaises(ValueError): + tf_val = av2.accumulate_n_v2([]) + tf_val.eval() + + def testWrongShape(self): + with self.test_session(): + with self.assertRaises(ValueError): + a = variables.Variable(0.2) + b = variables.Variable(0.1) + tf_val = av2.accumulate_n_v2([a,b], shape=[2,2]) # Should be shape=[] + + def testIncompatibleShapes(self): + with self.test_session(): + with self.assertRaises(ValueError): + a = variables.Variable(np.array([0.1,0.2])) + b = variables.Variable(np.array([[0.3],[0.4]])) + tf_val = av2.accumulate_n_v2([a,b]) + + def testWrongType(self): + with self.test_session(): + with self.assertRaises(TypeError): + a = variables.Variable(0.2, dtype=np.float32) + b = variables.Variable(0.1, dtype=np.float32) + tf_val = av2.accumulate_n_v2([a,b], tensor_dtype=np.int32) + + def testWrongTypeOneInput(self): + # Scenario that used to trigger a bug, even when testWrongType() worked + with self.test_session(): + with self.assertRaises(TypeError): + a = variables.Variable(0.2, dtype=np.float32) + tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index aaede2a6bb..aff132134c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1938,6 +1938,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ tf_cuda_library( name = "core_cpu_impl", srcs = [ + "common_runtime/accumulate_n_optimizer.cc", "common_runtime/allocator_retry.cc", "common_runtime/bfc_allocator.cc", "common_runtime/build_graph_options.cc", diff --git a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc new file mode 100644 index 0000000000..81cd44870e --- /dev/null +++ b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc @@ -0,0 +1,191 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/graph/node_builder.h" + + +namespace tensorflow { +namespace { + +Tensor make_zeros(const DataType& dtype, const TensorShapeProto& shape) { + Tensor tensor(dtype, TensorShape(shape)); + + // Conveniently, all numeric data types have 0x0 == zero. Otherwise we would + // need a giant switch statement here. + memset(const_cast(tensor.tensor_data().data()), 0, + tensor.tensor_data().size()); + + return tensor; +} + +// Replaces occurrences of the "AccumulateNV2" stub operator with a graph of +// lower-level ops. The graph is equivalent (modulo certain corner cases) +// to the semantics of the original accumulate_n() Python op in math_ops.py. +// Implementing the op with a rewrite allows this new variant of accumulate_n +// to be differentiable. +// +// The binary code that generates AccumulateNV2 stub ops is located in a +// dynamic library built out of tensorflow/contrib/framework. Ideally, this +// class would also be in contrib, but calls to REGISTER_OPTIMIZATION() from +// third-party libraries aren't currently supported. +class AccumulateNV2RemovePass : public GraphOptimizationPass { + public: + + Status Run(const GraphOptimizationPassOptions& options) override { + // TODO(freiss.oss@gmail.com): Substantial shared code with + // ParallelConcatRemovePass::Run(). Consider refactoring if someone makes + // a third similar rewrite. + if (options.graph == nullptr) { + // TODO(apassos) returning OK feels weird here as we can't do anything + // without a graph, but some tests require this. + return Status::OK(); + } + + Graph* g = options.graph->get(); + if (g == nullptr) { + return errors::Internal( + "AccumulateNV2 removal should happen before partitioning and a " + "graph should be available."); + } + + // Build up a todo list of ops to replace, *then* modify the graph + gtl::InlinedVector matches; + for (Node* n : g->op_nodes()) { + if (n->type_string() == "AccumulateNV2") { + matches.push_back(n); + } + } + for (Node* n : matches) { + TF_RETURN_IF_ERROR(rewriteNode(n, g)); + } + return Status::OK(); + } + + Status rewriteNode(Node* n, Graph* g) { + AttrSlice n_attrs = n->attrs(); + auto base_make_node = [n, g, &n_attrs](const string& op, + const string& name) { + NodeBuilder node_builder(name, op); + + // The pieces of AccumulateNV2 should all be on the same node. + node_builder.Device(n->requested_device()); + string colo; + if (GetNodeAttr(n_attrs, kColocationAttrName, &colo).ok()) { + node_builder.Attr(kColocationAttrName, colo); + } + return node_builder; + }; + auto make_node = [n, g, &n_attrs, &base_make_node](string op) { + return base_make_node( + op, g->NewName(strings::StrCat(n->name(), "/Internal"))); + }; + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype)); + TensorShapeProto shape; + TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "shape", &shape)); + + std::vector data_edges, control_edges; + for (const Edge* input_edge : n->in_edges()) { + if (input_edge->IsControlEdge()) { + control_edges.push_back(input_edge); + } else { + data_edges.push_back(input_edge); + } + } + + // Create the following ops to replace the AccumulateNV2 placeholder: + Node* create_accumulator = nullptr; // TemporaryVariable op + Node* initial_val = nullptr; // Const op + Node* initialize_accumulator = nullptr; // Assign op + std::vector add_values_to_accumulator; // AssignAdd ops + Node* clean_up_accumulator = nullptr; // DestroyTemporaryVariable + + const string accumulator_name = + strings::StrCat(n->name(), "/Internal/Accumulator"); + TF_RETURN_IF_ERROR(make_node("TemporaryVariable") + .Attr("shape", shape) + .Attr("dtype", dtype) + .Attr("var_name", accumulator_name) + .Finalize(g, &create_accumulator)); + TF_RETURN_IF_ERROR(make_node("Const") + .Attr("value", make_zeros(dtype, shape)) + .Attr("dtype", dtype) + .Finalize(g, &initial_val)); + TF_RETURN_IF_ERROR(make_node("Assign") + .Attr("T", dtype) + .Input(create_accumulator) // ref: Ref(T) + .Input(initial_val) // value: T + .Finalize(g, &initialize_accumulator)); + for (int i = 0; i < data_edges.size(); ++i) { + Node* assignAdd; + TF_RETURN_IF_ERROR(make_node("AssignAdd") + .Attr("T", dtype) + .Attr("use_locking", true) + .Input(initialize_accumulator) // ref: Ref(T) + .Input(data_edges[i]->src(), + data_edges[i]->src_output()) // value: T + .Finalize(g, &assignAdd)); + + add_values_to_accumulator.push_back(assignAdd); + } + + // Note that we use the original placeholder op's name here + TF_RETURN_IF_ERROR(base_make_node("DestroyTemporaryVariable", n->name()) + .Attr("T", dtype) + .Attr("var_name", accumulator_name) + .Input(initialize_accumulator) + .Finalize(g, &clean_up_accumulator)); + + // Add edges to the graph to ensure that operations occur in the right + // order: + // 1. Do anything that had a control edge to the AccumulateNV2 placeholder + // 2. Initialize accumulator + // 3. Add input values to accumulator (already handled by data edges + // added above) + // 4. Reclaim the buffer that held the accumulator + // 5. Do anything that depended on the AccumulateNV2 placeholder + for (const Edge* control_edge : control_edges) { + g->AddControlEdge(control_edge->src(), initialize_accumulator); + } + + for (Node* assign_add : add_values_to_accumulator) { + g->AddControlEdge(assign_add, clean_up_accumulator); + } + + for (const Edge* out_edge : n->out_edges()) { + if (out_edge->IsControlEdge()) { + g->AddControlEdge(clean_up_accumulator, out_edge->dst()); + } else { + g->AddEdge(clean_up_accumulator, 0, out_edge->dst(), + out_edge->dst_input()); + } + } + + // Remove the original AccumulateNV2 placeholder op. + // This removal modifies the op and must happen after we have finished + // using its incoming/outgoing edge sets. + g->RemoveNode(n); + + return Status::OK(); + } +}; +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, + AccumulateNV2RemovePass); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 015fd6e388..967b121a44 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -49,6 +49,38 @@ inputs: Must all be the same size and shape. // -------------------------------------------------------------------------- +// Note that the following operator is just a placeholder and has no +// associated kernel. The code in accumulate_n_optimizer.cc replaces +// this placeholder with a graph of operators that do have kernels. +// The Python code that generates instances of this op is currently in +// contrib/framework/python/ops/accumulate_n_v2.py +REGISTER_OP("AccumulateNV2") + .Input("inputs: N * T") + .Output("sum: T") + .Attr("N: int >= 1") + .Attr("T: numbertype") + .Attr("shape: shape") + .SetIsCommutative() + .SetIsAggregate() + .SetShapeFn(shape_inference::ExplicitShape) + .Doc(R"doc( +Returns the element-wise sum of a list of tensors. + +`tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +wait for all of its inputs to be ready before beginning to sum. This can +save memory if inputs are ready at different times, since minimum temporary +storage is proportional to the output size rather than the inputs size. + +Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. + +Returns a `Tensor` of same shape and type as the elements of `inputs`. + +inputs: A list of `Tensor` objects, each with same shape and type. +shape: Shape of elements of `inputs`. +)doc"); + +// -------------------------------------------------------------------------- + REGISTER_OP("BatchMatMul") .Input("x: T") .Input("y: T") diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt index d27e867583..a12f750ec1 100644 --- a/tensorflow/python/ops/hidden_ops.txt +++ b/tensorflow/python/ops/hidden_ops.txt @@ -42,6 +42,7 @@ UniformCandidateSampler GenerateVocabRemapping LoadAndRemapMatrix + # control_flow_ops Switch Merge @@ -240,6 +241,7 @@ TensorSummaryV2 # math_ops Abs +AccumulateNV2 AddN All Any -- GitLab From ccc00be1b1e3ed9bbf1b47fec007ac3f06b8ce7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 6 Oct 2017 05:22:44 +0800 Subject: [PATCH 081/909] PREP: migrate ErfGrad to c++ side (#12872) * ENH: migrate ErfGrad * TST: add test case for real value * CLN: add semicolon * DOC: add comment * CLN: remove useless dependency * CLN: remove useless dependency in LgmmaGrad * TST: move lgamma test case * TST: add test case for Erf * TST: complex is unsupported for kernel * TST: complex64 -> float * ENH: use grad_scope * ENH: fix grad_scope for TanhGrad and SigmoidGrad * ENH: import M_PI --- tensorflow/cc/gradients/math_grad.cc | 32 ++++++++++--- tensorflow/cc/gradients/math_grad_test.cc | 58 ++++++++++++++++++----- 2 files changed, 71 insertions(+), 19 deletions(-) diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index ac288b1d83..68410812c5 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#define _USE_MATH_DEFINES +#include + #include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/math_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" @@ -200,8 +203,8 @@ Status TanhGrad(const Scope& scope, const Operation& op, // evaluated. Scope grad_scope = scope.WithControlDependencies(grad); auto y = ConjugateHelper(grad_scope, op.output(0)); - grad_outputs->push_back(internal::TanhGrad(scope, y, grad)); - return scope.status(); + grad_outputs->push_back(internal::TanhGrad(grad_scope, y, grad)); + return grad_scope.status(); } REGISTER_GRADIENT_OP("Tanh", TanhGrad); @@ -256,8 +259,8 @@ Status SigmoidGrad(const Scope& scope, const Operation& op, // evaluated. Scope grad_scope = scope.WithControlDependencies(grad); auto y = ConjugateHelper(grad_scope, op.output(0)); - grad_outputs->push_back(internal::SigmoidGrad(scope, y, grad)); - return scope.status(); + grad_outputs->push_back(internal::SigmoidGrad(grad_scope, y, grad)); + return grad_scope.status(); } REGISTER_GRADIENT_OP("Sigmoid", SigmoidGrad); @@ -696,15 +699,32 @@ Status MeanGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Mean", MeanGrad); +Status ErfGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + auto grad = grad_inputs[0]; + auto two_over_root_pi = Cast(scope, Const(scope, 2 / std::sqrt(M_PI)), + grad.type()); + Scope grad_scope = scope.WithControlDependencies(grad); + auto x = ConjugateHelper(grad_scope, op.input(0)); + // grad * 2/sqrt(pi) * exp(-x**2) + auto dx = Mul(grad_scope, + Mul(grad_scope, grad, two_over_root_pi), + Exp(grad_scope, Neg(grad_scope, Square(grad_scope, x)))); + grad_outputs->push_back(dx); + return grad_scope.status(); +} +REGISTER_GRADIENT_OP("Erf", ErfGrad); + Status LgammaGrad(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { auto grad = grad_inputs[0]; Scope grad_scope = scope.WithControlDependencies(grad); auto x = ConjugateHelper(grad_scope, op.input(0)); - auto dx = Mul(scope, grad, Digamma(scope, x)); + auto dx = Mul(grad_scope, grad, Digamma(grad_scope, x)); grad_outputs->push_back(dx); - return scope.status(); + return grad_scope.status(); } REGISTER_GRADIENT_OP("Lgamma", LgammaGrad); diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc index a174f223ad..6313f41da5 100644 --- a/tensorflow/cc/gradients/math_grad_test.cc +++ b/tensorflow/cc/gradients/math_grad_test.cc @@ -64,7 +64,9 @@ class CWiseUnaryGradTest : public ::testing::Test { IMAG, CONJ, COMPLEX, - ANGLE + ANGLE, + LGAMMA, + ERF }; template @@ -168,6 +170,12 @@ class CWiseUnaryGradTest : public ::testing::Test { case ANGLE: y = Angle(scope_, x); break; + case LGAMMA: + y = Lgamma(scope_, x); + break; + case ERF: + y = Erf(scope_, x); + break; } float max_error; @@ -503,6 +511,42 @@ TEST_F(CWiseUnaryGradTest, Angle) { TestCWiseGrad(ANGLE, x_fn); } +TEST_F(CWiseUnaryGradTest, Lgamma) { + auto x_fn = [this](const int i) { + return RV({-3.5, -2.5, -1.5, 1.0, 2.0, 3.5}); + }; + TestCWiseGrad(LGAMMA, x_fn); +} + +TEST_F(CWiseUnaryGradTest, Lgamma_Complex) { + auto x_fn = [this](const int i) { + return CRV({{-3.5, 0.5}, {-1.5, -0.5}, {1.5, -1.0}, {3.5, 1.0}}); + }; + // TODO(kbsriram) + // Add test when the lgamma kernel supports complex numbers + if (false) { + TestCWiseGrad(LGAMMA, x_fn); + } +} + +TEST_F(CWiseUnaryGradTest, Erf) { + auto x_fn = [this](const int i) { + return RV({-1.2, -1.0, -0.5, 0.3, 0.5, 1.3}); + }; + TestCWiseGrad(ERF, x_fn); +} + +TEST_F(CWiseUnaryGradTest, Erf_Complex) { + auto x_fn = [this](const int i) { + return CRV({{-1.2, 0.5}, {-0.5, -0.5}, {0.5, 0.5}, {1.2, -0.5}}); + }; + // TODO(kbsriram) + // Add test when the erf kernel supports complex numbers + if (false) { + TestCWiseGrad(ERF, x_fn); + } +} + class MathGradTest : public ::testing::Test { protected: MathGradTest() : root_(Scope::NewRootScope().WithDevice("/cpu:0")) {} @@ -821,17 +865,5 @@ TEST_F(NaryGradTest, Minimum) { RunTest(x, x_init_value, y, shape); } -TEST_F(NaryGradTest, Lgamma) { - TensorShape shape({3, 2}); - auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = Lgamma(scope_, x); - // Select values to avoid instability when computing finite differences. - // Ref: https://en.wikipedia.org/wiki/File:Gamma_plot.svg - Tensor x_init_value = - test::AsTensor({-3.5f, -2.5f, -1.5f, 1.0f, 2.0f, 3.5f}, {3, 2}); - RunTest(x, x_init_value, y, shape); - // TODO(suharshs): add test case for complex values -} - } // namespace } // namespace tensorflow -- GitLab From 5ad997498ac60d72f0f8f92a8d413b2398466aa7 Mon Sep 17 00:00:00 2001 From: Scott Kirkland Date: Thu, 5 Oct 2017 14:23:04 -0700 Subject: [PATCH 082/909] model_dir keyword argument repeated (#13494) In https://www.tensorflow.org/tutorials/wide#adding_regularization_to_prevent_overfitting, the code repeats the model_dir keyword argument, causing a syntax error if you try to run it (`SyntaxError: keyword argument repeated`). This remove the second occurrence of the model_dir param. --- tensorflow/docs_src/tutorials/wide.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 3055c54021..6292c1a01e 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -426,8 +426,7 @@ m = tf.estimator.LinearClassifier( optimizer=tf.train.FtrlOptimizer( learning_rate=0.1, l1_regularization_strength=1.0, - l2_regularization_strength=1.0), - model_dir=model_dir) + l2_regularization_strength=1.0)) ``` One important difference between L1 and L2 regularization is that L1 -- GitLab From 8b90d603a9359af361fc4dad7883f533dd365f32 Mon Sep 17 00:00:00 2001 From: Dhananjay Nakrani Date: Thu, 5 Oct 2017 14:53:21 -0700 Subject: [PATCH 083/909] Fix ASAN test. ASAN correctly complains about the overflow on `CT(Eigen::NumTraits::highest())`. This fixes the issue by providing correct CT for half and floats. PiperOrigin-RevId: 171212745 --- tensorflow/core/kernels/random_poisson_op.cc | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc index 3f635dbbaf..bf1d83ec75 100644 --- a/tensorflow/core/kernels/random_poisson_op.cc +++ b/tensorflow/core/kernels/random_poisson_op.cc @@ -58,25 +58,8 @@ static constexpr int kReservedSamplesPerOutput = 256; typedef Eigen::ThreadPoolDevice CPUDevice; -// We will compute half-precision Poisson samples with float precision -// intermediate calculations. template struct PoissonComputeType { - typedef T ComputeType; -}; - -template <> -struct PoissonComputeType { - typedef float ComputeType; -}; - -template <> -struct PoissonComputeType { - typedef double ComputeType; -}; - -template <> -struct PoissonComputeType { typedef double ComputeType; }; -- GitLab From 0e71ecaf9512cd8a69af01ac85e5e1632171c651 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 5 Oct 2017 15:00:43 -0700 Subject: [PATCH 084/909] [TFXLA] Loops whose values are not consumed need no out edges. If there is no exit node then there is not need to add output edges to it. PiperOrigin-RevId: 171213900 --- .../tf2xla/functionalize_control_flow.cc | 27 +++-- .../tf2xla/functionalize_control_flow_test.cc | 102 ++++++++++++++++++ 2 files changed, 115 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 56d8bb4f2c..b9b2b4be27 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -402,10 +402,6 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, arg.exit = edge->dst(); } } - if (arg.exit == nullptr) { - return errors::InvalidArgument("Missing Exit successor to ", - arg.switch_node->name()); - } } } @@ -470,16 +466,19 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, } if (!arg.is_loop_invariant) { - std::vector edges(arg.exit->out_edges().begin(), - arg.exit->out_edges().end()); - for (const Edge* edge : edges) { - Node* dst = edge->dst(); - int dst_input = edge->dst_input(); - graph->RemoveEdge(edge); - - int src_output = - dst_input == Graph::kControlSlot ? Graph::kControlSlot : i; - graph->AddEdge(while_node, src_output, dst, dst_input); + // Add output edges if the output of the loop is consumed. + if (arg.exit != nullptr) { + std::vector edges(arg.exit->out_edges().begin(), + arg.exit->out_edges().end()); + for (const Edge* edge : edges) { + Node* dst = edge->dst(); + int dst_input = edge->dst_input(); + graph->RemoveEdge(edge); + + int src_output = + dst_input == Graph::kControlSlot ? Graph::kControlSlot : i; + graph->AddEdge(while_node, src_output, dst, dst_input); + } } } } diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc index 8f155ca85e..4acdf1a26d 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc @@ -297,6 +297,108 @@ TEST(FunctionalizeControlFlow, OneLoopVar) { } } +// Tests functionalizing OneLoopVar where the loop value is not used post the +// loop. +// Graph: +// x = array_ops.placeholder(dtypes.int32) +// control_flow_ops.while_loop(lambda i: i < 10, lambda i: i + 1, [x]) +TEST(FunctionalizeControlFlow, OneLoopVarWithoutExit) { + Graph graph(OpRegistry::Global()); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + + auto dummy = ops::Placeholder(scope.WithOpName("Dummy"), DT_INT32); + + auto source = ops::Placeholder(scope.WithOpName("source"), DT_INT32); + auto enter = + ops::internal::Enter(scope.WithOpName("while/Enter"), source, "aloop"); + auto merge = ops::Merge(scope.WithOpName("while/Merge"), + std::initializer_list{enter, dummy}); + auto ten = ops::Const( + scope.WithOpName("while/Less/y").WithControlDependencies(merge.output), + 10); + auto less = ops::Less(scope.WithOpName("while/Less"), merge.output, ten); + auto loop_cond = ops::LoopCond(scope.WithOpName("while/LoopCond"), less); + auto switch_ = + ops::Switch(scope.WithOpName("while/Switch"), merge.output, loop_cond); + auto identity = + ops::Identity(scope.WithOpName("while/Identity"), switch_.output_true); + auto one = ops::Const( + scope.WithOpName("while/add/y").WithControlDependencies(identity), 1); + auto add = ops::Add(scope.WithOpName("while/add"), identity, one); + auto next_iteration = + ops::NextIteration(scope.WithOpName("while/NextIteration"), add); + + // Remove the dummy node and add the loop backedge. + scope.graph()->RemoveNode(dummy.node()); + scope.graph()->AddEdge(next_iteration.node(), 0, merge.output.node(), 1); + + TF_EXPECT_OK(scope.ToGraph(&graph)); + } + + FunctionLibraryDefinition library(OpRegistry::Global(), {}); + TF_ASSERT_OK(FunctionalizeControlFlow(&graph, &library)); + + GraphDef graph_def; + graph.ToGraphDef(&graph_def); + + NameAttrList cond_fn, body_fn; + TF_EXPECT_OK(FindWhileCondAndBody(graph_def, &cond_fn, &body_fn)); + + // Outer graph + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto source = ops::Placeholder(scope.WithOpName("source"), DT_INT32); + auto while_op = + ops::XlaWhile(scope.WithOpName("while/LoopCond"), + std::initializer_list{source}, cond_fn, body_fn); + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, graph_def); + } + + // Condition graph + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto arg = ops::_Arg(scope.WithOpName("_arg0"), DT_INT32, 0); + auto ten = ops::Const( + scope.WithOpName("while/Less/y").WithControlDependencies(arg), 10); + auto less = ops::Less(scope.WithOpName("while/Less"), arg, ten); + auto retval = ops::_Retval(scope.WithOpName("_retval0_RetVal"), less, 0); + + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + + InstantiationResultForTest result; + TF_EXPECT_OK(InstantiateFunctionForTest(cond_fn.name(), library, &result)); + + EXPECT_EQ(DataTypeVector{DT_INT32}, result.arg_types); + EXPECT_EQ(DataTypeVector{DT_BOOL}, result.ret_types); + TF_EXPECT_GRAPH_EQ(expected, result.gdef); + } + + // Body graph. + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto arg = ops::_Arg(scope.WithOpName("_arg0"), DT_INT32, 0); + auto identity = ops::Identity(scope.WithOpName("while/Identity"), arg); + auto one = ops::Const( + scope.WithOpName("while/add/y").WithControlDependencies(identity), 1); + auto add = ops::Add(scope.WithOpName("while/add"), identity, one); + auto retval = ops::_Retval(scope.WithOpName("_retval0_RetVal"), add, 0); + + GraphDef expected; + TF_EXPECT_OK(scope.ToGraphDef(&expected)); + + InstantiationResultForTest result; + TF_EXPECT_OK(InstantiateFunctionForTest(body_fn.name(), library, &result)); + + EXPECT_EQ(DataTypeVector{DT_INT32}, result.arg_types); + EXPECT_EQ(DataTypeVector{DT_INT32}, result.ret_types); + TF_EXPECT_GRAPH_EQ(expected, result.gdef); + } +} + // Graph: // x = array_ops.placeholder(dtypes.int32) // y = array_ops.placeholder(dtypes.int32) -- GitLab From fca432028808c3d17f74b2a80a2ab8f83a0a91b1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 5 Oct 2017 15:28:51 -0700 Subject: [PATCH 085/909] Internal private header file with eager C struct definitions. PiperOrigin-RevId: 171218337 --- tensorflow/c/eager/BUILD | 22 ++++++- tensorflow/c/eager/c_api.cc | 59 +----------------- tensorflow/c/eager/c_api_internal.h | 96 +++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 61 deletions(-) create mode 100644 tensorflow/c/eager/c_api_internal.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 52945d3239..d39f229b42 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -10,13 +10,15 @@ load( tf_cuda_library( name = "c_api", - srcs = ["c_api.cc"], + srcs = [ + "c_api.cc", + "c_api_internal.h", + ], hdrs = ["c_api.h"], copts = tf_copts(), visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ - ":c_api_internal", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ @@ -33,6 +35,21 @@ tf_cuda_library( }), ) +tf_cuda_library( + name = "c_api_internal", + hdrs = ["c_api_internal.h"], + deps = [ + ":c_api", + ":runtime", + "//tensorflow/c:c_api", + "//tensorflow/c:c_api_internal", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework_internal", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_internal", + ], +) + tf_cc_test( name = "c_api_test", srcs = ["c_api_test.cc"], @@ -53,7 +70,6 @@ tf_cuda_library( visibility = ["//tensorflow:internal"], deps = select({ "//tensorflow:android": [ - ":c_api_internal", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 801d730749..74f2e4f342 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/c/c_api.h" #include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/c_api_internal.h" #include "tensorflow/c/eager/runtime.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" @@ -51,64 +52,6 @@ string DeviceName(tensorflow::Device* d) { } } // namespace -struct TFE_Context { - explicit TFE_Context(TF_Session* s) : session(s) {} - - // TFE_Context is an extension of TF_Session. And TF_Session needs a TF_Graph. - TF_Session* session; - tensorflow::Rendezvous* rendezvous; - - tensorflow::mutex functions_mu; - tensorflow::FunctionLibraryDefinition func_lib_def GUARDED_BY(functions_mu){ - tensorflow::OpRegistry::Global(), {}}; - - // One FunctionLibraryRuntime per device. - // func_libs[i] is the FunctionLibraryRuntime corresponding to - // session->devices[i]. - std::unique_ptr pflr; - - std::unordered_map - kernel_cache; - - tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) { - return pflr->GetFLR(d->name()); - } - - const std::vector& devices() { return session->devices; } -}; - -struct TFE_TensorHandle { - TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d) - : t(t), d(d) {} - - tensorflow::Tensor t; - // TODO(ashankar): d == nullptr iff local CPU - // This was expedient, but perhaps worth revisiting ('d' should always be a - // valid pointer?) - // This can be done if TFE_NewOp() and the TFE_TensorHandle constructors are - // provided with the appropriate TFE_Context. - // - // TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a - // TFE_TensorHandle does not outlive the TFE_Context from which it came? - tensorflow::Device* d; -}; - -struct TFE_Op { - TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t) - : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {} - - bool const is_function() const { return attr_types == nullptr; } - - TFE_Context* ctx; // Must outlive the TFE_Op. - const string name; - tensorflow::AttrBuilder attrs; - const tensorflow::AttrTypeMap* attr_types; - std::vector inputs; - std::vector input_devices; - tensorflow::Device* device; -}; - extern "C" { TFE_Context* TFE_NewContext(const TF_SessionOptions* opts, TF_Status* status) { diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h new file mode 100644 index 0000000000..712526f170 --- /dev/null +++ b/tensorflow/c/eager/c_api_internal.h @@ -0,0 +1,96 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EAGER_C_API_INTERNAL_H_ +#define TENSORFLOW_C_EAGER_C_API_INTERNAL_H_ + +#include "tensorflow/c/eager/c_api.h" + +#include +#include +#include +#include +#include + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_internal.h" +#include "tensorflow/c/eager/runtime.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" + +struct TFE_Context { + explicit TFE_Context(TF_Session* s) : session(s) {} + + // TFE_Context is an extension of TF_Session. And TF_Session needs a TF_Graph. + TF_Session* session; + tensorflow::Rendezvous* rendezvous; + + tensorflow::mutex functions_mu; + tensorflow::FunctionLibraryDefinition func_lib_def GUARDED_BY(functions_mu){ + tensorflow::OpRegistry::Global(), {}}; + + // One FunctionLibraryRuntime per device. + // func_libs[i] is the FunctionLibraryRuntime corresponding to + // session->devices[i]. + std::unique_ptr pflr; + + std::unordered_map + kernel_cache; + + tensorflow::FunctionLibraryRuntime* func_lib(tensorflow::Device* d) { + return pflr->GetFLR(d->name()); + } + + const std::vector& devices() { return session->devices; } +}; + +struct TFE_TensorHandle { + TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d) + : t(t), d(d) {} + + tensorflow::Tensor t; + // TODO(ashankar): d == nullptr iff local CPU + // This was expedient, but perhaps worth revisiting ('d' should always be a + // valid pointer?) + // This can be done if TFE_NewOp() and the TFE_TensorHandle constructors are + // provided with the appropriate TFE_Context. + // + // TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a + // TFE_TensorHandle does not outlive the TFE_Context from which it came? + tensorflow::Device* d; +}; + +struct TFE_Op { + TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t) + : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {} + + bool const is_function() const { return attr_types == nullptr; } + + TFE_Context* ctx; // Must outlive the TFE_Op. + const tensorflow::string name; + tensorflow::AttrBuilder attrs; + const tensorflow::AttrTypeMap* attr_types; + std::vector inputs; + std::vector input_devices; + tensorflow::Device* device; +}; + +#endif // TENSORFLOW_C_EAGER_C_API_INTERNAL_H_ -- GitLab From e11b9fd32eb5b8f1eb9b8a30dbb08fc1f83fc1dd Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 5 Oct 2017 15:42:09 -0700 Subject: [PATCH 086/909] [Grappler] Fix a bug with multiple-output nodes. TrySimplifyAndReshapeUses should return a tensor not a node. Added a regression test that would have failed without this CL. ArithmeticOptimizer would have redirected the second input of concat to Split rather than Split:1. PiperOrigin-RevId: 171220303 --- .../optimizers/arithmetic_optimizer.cc | 28 +++++++-------- .../optimizers/arithmetic_optimizer.h | 11 ++++-- .../optimizers/arithmetic_optimizer_test.cc | 34 +++++++++++++++++++ 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index ba4487b6fc..2d7cf3b182 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -274,7 +274,7 @@ static bool SimplyReordersData(const NodeDef& node) { return node.op() == "Transpose"; } -const NodeDef* ArithmeticOptimizer::TrySimplifyAndReplaceUses( +string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, GraphDef* graph_def, NodeMap* node_map, std::vector* new_nodes) const { // Remove inverse transposes. @@ -288,7 +288,7 @@ const NodeDef* ArithmeticOptimizer::TrySimplifyAndReplaceUses( if (Int32ValuesFromNode(*node_perm, &node_perm_values) && Int32ValuesFromNode(*input_perm, &input_perm_values) && AreInversePermutations(node_perm_values, input_perm_values)) { - return node_map->GetNode(input->input(0)); + return input->input(0); } } } @@ -316,7 +316,7 @@ const NodeDef* ArithmeticOptimizer::TrySimplifyAndReplaceUses( reshape->set_input(0, input->input(0)); node_map->UpdateInput(reshape->name(), input->name(), input->input(0)); new_nodes->push_back(reshape); - return reshape; + return reshape->name(); } } @@ -409,14 +409,14 @@ const NodeDef* ArithmeticOptimizer::TrySimplifyAndReplaceUses( consumer_of_mul->set_input(0, mul->input(0)); node_map->UpdateInput(consumer_of_mul->name(), mul->name(), other->name()); - return conv; + return conv->name(); } } } } } - return nullptr; + return ""; } namespace { @@ -459,28 +459,28 @@ void ArithmeticOptimizer::SimplifyArithmeticOps( while (!nodes_to_simplify.Empty()) { const NodeDef* node = nodes_to_simplify.PopBack(); std::vector new_nodes; - const NodeDef* simplified_node = + const string simplified_tensor = TrySimplifyAndReplaceUses(node, optimized_graph, &node_map, &new_nodes); - if (!simplified_node) { + if (simplified_tensor.empty()) { continue; } - if (simplified_node->name() != node->name()) { + if (NodeName(simplified_tensor) != node->name()) { // When `node` is simplifed to another node rather than in-place, the - // consumers of `node` are redirected to `simplified_node`. Re-push the - // consumers into `nodes_to_simplify` for further optimizations. + // consumers of `node` are already redirected to `simplified_tensor`. + // Re-push the consumers into `nodes_to_simplify` for further + // optimizations. std::set consumers = node_map.GetOutputs(node->name()); for (NodeDef* consumer : consumers) { // Update `consumer`'s use of `node` to `input`'s operand. for (int i = 0; i < consumer->input_size(); ++i) { if (NodeName(consumer->input(i)) == node->name()) { - *consumer->mutable_input(i) = simplified_node->name(); + *consumer->mutable_input(i) = simplified_tensor; } } VLOG(2) << "Update input " << node->name() << " of " << consumer->name() - << " to " << simplified_node->name(); - node_map.UpdateInput(consumer->name(), node->name(), - simplified_node->name()); + << " to " << simplified_tensor; + node_map.UpdateInput(consumer->name(), node->name(), simplified_tensor); if (!nodes_to_simplify.Exists(consumer)) { nodes_to_simplify.PushBack(consumer); } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 55757086cd..fc381ec907 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -45,8 +45,9 @@ class ArithmeticOptimizer : public GraphOptimizer { // transposes. void SimplifyArithmeticOps(GraphDef* optimized_graph) const; // Tries to simplify the expression that roots at `node` and replaces the uses - // of `node` to the simplified expression. Returns the simplified node or - // nullptr if no simplification is performed. + // of `node` to the simplified expression. Returns the name of the simplified + // tensor (e.g. "split:1") or an emtpy string if no simplification is + // performed. // // `node_map` stores the mapping from node names to NodeDef*, and will be // updated according to the rewrite. @@ -54,7 +55,11 @@ class ArithmeticOptimizer : public GraphOptimizer { // `new_nodes` will be populated with the new nodes this function creates and // updates. The caller can push these nodes into the simplification queue to // optimize them further. - const NodeDef* TrySimplifyAndReplaceUses( + // + // TODO(jingyue): This interface is not suitable for optimizing nodes with + // multiple output tensors. We should pass in a tensor name instead of a + // NodeDef. + string TrySimplifyAndReplaceUses( const NodeDef* node, GraphDef* graph_def, NodeMap* node_map, std::vector* new_nodes) const; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index c81ed5a414..c8bca4282b 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -140,6 +140,40 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { std::set({"inputs_shape", "inputs", "outputs"})); } +TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposesMultipleOutputs) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output inputs_shape = + ops::Const(s.WithOpName("inputs_shape"), {8, 9, 28, 28}, {4}); + Output inputs = ops::Placeholder(s.WithOpName("inputs"), DT_FLOAT, + ops::Placeholder::Shape({8, 12, 28, 28})); + OutputList split = ops::Split(s, ops::Const(s, 1), inputs, 3).output; + Output perm1 = ops::Const(s, {0, 2, 3, 1}, {4}); + Output perm2 = ops::Const(s, {0, 3, 1, 2}, {4}); + Output branch0 = split[0]; + Output branch1 = ops::Transpose(s, ops::Transpose(s, split[1], perm1), perm2); + Output branch2 = split[2]; + Output concat = ops::Concat(s, {branch0, branch1, branch2}, ops::Const(s, 1)); + Output outputs = ops::Identity(s.WithOpName("outputs"), concat); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + for (const NodeDef& node : output.node()) { + if (node.op() == "Concat") { + EXPECT_EQ(node.input(0), "Split"); + EXPECT_EQ(node.input(1), "Split:1"); + EXPECT_EQ(node.input(2), "Split:2"); + } + } +} + TEST_F(ArithmeticOptimizerTest, NotRemoveTransposes) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs_shape = -- GitLab From 95a7ea781025fe7509b09e9fcb23d02f35bcf2d7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 5 Oct 2017 15:50:44 -0700 Subject: [PATCH 087/909] Automated g4 rollback of changelist 171084886 PiperOrigin-RevId: 171221629 --- tensorflow/compiler/tf2xla/kernels/BUILD | 5 +- .../kernels/gather_op_kernel_float_int32.cc | 3 - .../kernels/gather_op_kernel_float_int64.cc | 3 - .../index_ops_kernel_argmax_float_1d.cc | 3 - .../index_ops_kernel_argmax_float_2d.cc | 3 - tensorflow/compiler/xla/service/cpu/BUILD | 12 -- .../cpu/custom_call_target_registry.cc | 39 ---- .../service/cpu/custom_call_target_registry.h | 74 ------- .../xla/service/cpu/simple_orc_jit.cc | 195 ++++++++---------- tensorflow/compiler/xla/tests/BUILD | 3 +- .../compiler/xla/tests/custom_call_test.cc | 14 +- tensorflow/compiler/xla/xla.bzl | 8 + 12 files changed, 96 insertions(+), 266 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc delete mode 100644 tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 915c95e945..6a0c4fef75 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -5,6 +5,7 @@ package( ) load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("//tensorflow/compiler/xla:xla.bzl", "export_dynamic_linkopts") tf_kernel_library( name = "xla_ops", @@ -154,7 +155,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/tf2xla:xla_local_runtime_context", - "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:gather_functor_hdr", @@ -169,7 +169,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/compiler/tf2xla:xla_local_runtime_context", - "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:gather_functor_hdr", @@ -183,7 +182,6 @@ cc_library( srcs = ["index_ops_kernel_argmax_float_1d.cc"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], @@ -195,7 +193,6 @@ cc_library( srcs = ["index_ops_kernel_argmax_float_2d.cc"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/core:framework_lite", "//third_party/eigen3", ], diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc index 0b44e0c6f8..33b1b087d0 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int32.cc @@ -17,7 +17,6 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gather_functor.h" #include "tensorflow/core/platform/dynamic_annotations.h" @@ -71,5 +70,3 @@ EIGEN_STRONG_INLINE void gather_float_int32_xla_impl(float* out, void** data) { extern "C" void TF_EXPORT gather_float_int32_xla_impl(float* out, void** data) { tensorflow::gather_float_int32_xla_impl(out, data); } - -REGISTER_CUSTOM_CALL_TARGET(gather_float_int32_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc index d7c7a7bf2c..5e2d872ce0 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op_kernel_float_int64.cc @@ -17,7 +17,6 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/compiler/tf2xla/xla_local_runtime_context.h" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gather_functor.h" #include "tensorflow/core/platform/dynamic_annotations.h" @@ -71,5 +70,3 @@ EIGEN_STRONG_INLINE void gather_float_int64_xla_impl(float* out, void** data) { extern "C" void TF_EXPORT gather_float_int64_xla_impl(float* out, void** data) { tensorflow::gather_float_int64_xla_impl(out, data); } - -REGISTER_CUSTOM_CALL_TARGET(gather_float_int64_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc index 47cf8c6675..afbd64ca50 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_1d.cc @@ -16,7 +16,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/macros.h" @@ -48,5 +47,3 @@ EIGEN_STRONG_INLINE void argmax_float_1d_xla_impl(void* out, void** data) { extern "C" void TF_EXPORT argmax_float_1d_xla_impl(void* out, void** data) { tensorflow::argmax_float_1d_xla_impl(out, data); } - -REGISTER_CUSTOM_CALL_TARGET(argmax_float_1d_xla_impl); diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc index 9b83392d8f..841ff2f4df 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_kernel_argmax_float_2d.cc @@ -16,7 +16,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/macros.h" @@ -50,5 +49,3 @@ EIGEN_STRONG_INLINE void argmax_float_2d_xla_impl(void* out, void** data) { extern "C" void TF_EXPORT argmax_float_2d_xla_impl(void* out, void** data) { tensorflow::argmax_float_2d_xla_impl(out, data); } - -REGISTER_CUSTOM_CALL_TARGET(argmax_float_2d_xla_impl); diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 5d13b82427..fa6e5b2313 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -129,7 +129,6 @@ cc_library( ":cpu_runtime_avx", ":cpu_runtime_neon", ":cpu_runtime_sse4_1", - ":custom_call_target_registry", ":disassembler", ":runtime_conv2d", ":runtime_matmul", @@ -675,17 +674,6 @@ cc_library( ], ) -cc_library( - name = "custom_call_target_registry", - srcs = [ - "custom_call_target_registry.cc", - ], - hdrs = [ - "custom_call_target_registry.h", - ], - visibility = ["//visibility:public"], -) - # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc deleted file mode 100644 index 5f5803874b..0000000000 --- a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.cc +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" - -namespace xla { -namespace cpu { - -CustomCallTargetRegistry* CustomCallTargetRegistry::Global() { - static auto* registry = new CustomCallTargetRegistry; - return registry; -} - -void CustomCallTargetRegistry::Register(const std::string& symbol, - void* address) { - std::lock_guard lock(mu_); - registered_symbols_[symbol] = address; -} - -void* CustomCallTargetRegistry::Lookup(const std::string& symbol) const { - std::lock_guard lock(mu_); - auto it = registered_symbols_.find(symbol); - return it == registered_symbols_.end() ? nullptr : it->second; -} - -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h b/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h deleted file mode 100644 index 2994642356..0000000000 --- a/tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ -#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ - -// This file is depended on by kernels that have to build for mobile devices. -// For this reason, we avoid relying on TensorFlow and instead only use the -// standard C++ library. - -#include // NOLINT -#include -#include - -namespace xla { -namespace cpu { - -// The CPU JIT compiler uses this registry to resolve symbolic CustomCall -// targets; so when using the CPU JIT, CustomCall targets need to be registered -// here with the symbol name used in the CustomCall. -// -// The XLA AOT compiler links using a standard offline linker; so when compiling -// in AOT mode, you *also* need to make sure the name of the callee (presumably -// implemented in C++) matches up with the symbolic name used in the CustomCall. -// -// We maintain the registry in both the JIT and the AOT cases for simplicity, -// but we only use it when running in JIT mode. -class CustomCallTargetRegistry { - public: - static CustomCallTargetRegistry* Global(); - - void Register(const std::string& symbol, void* address); - void* Lookup(const std::string& symbol) const; - - private: - std::unordered_map registered_symbols_; - mutable std::mutex mu_; -}; - -class RegisterCustomCallTarget { - public: - explicit RegisterCustomCallTarget(const std::string& name, void* address) { - CustomCallTargetRegistry::Global()->Register(name, address); - } -}; - -#define REGISTER_CUSTOM_CALL_CONCAT(a, b) a##b - -#define REGISTER_CUSTOM_CALL_TARGET_WITH_SYM_HELPER(symbol, address, counter) \ - static ::xla::cpu::RegisterCustomCallTarget REGISTER_CUSTOM_CALL_CONCAT( \ - custom_call_target_register, counter)(symbol, \ - reinterpret_cast(address)) - -#define REGISTER_CUSTOM_CALL_TARGET_WITH_SYM(symbol, address) \ - REGISTER_CUSTOM_CALL_TARGET_WITH_SYM_HELPER(symbol, address, __COUNTER__) - -#define REGISTER_CUSTOM_CALL_TARGET(function) \ - REGISTER_CUSTOM_CALL_TARGET_WITH_SYM(#function, function) - -} // namespace cpu -} // namespace xla - -#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CUSTOM_CALL_TARGET_REGISTRY_H_ diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 0711c9de27..c3c11df090 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h" #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" @@ -43,10 +42,90 @@ namespace xla { namespace cpu { namespace { +// Converts a symbol 'name' into the form expected by dlsym(). +std::string CanonicalizeSymbol(const std::string& name) { +#if defined(__APPLE__) + // On Mac OS X, dlsym() expects names not to be prefixed with a leading + // underscore. + if (!name.empty() && name.front() == '_') { + return name.substr(1); + } +#endif + return name; +} + +class JITSymbolTable { + public: + JITSymbolTable() { Populate(); } + + void* Lookup(llvm::StringRef jit_symbol_name) const { + auto it = jit_symbol_table_.find(jit_symbol_name); + return it == jit_symbol_table_.end() ? nullptr : it->getValue(); + } + + static bool MustBeInTable(llvm::StringRef name) { + // In particular, names starting with + // runtime::kXlaCpuRuntimeSymbolNamePrefix should not be dlsym'ed. + return name.startswith(runtime::kXlaCpuRuntimeSymbolNamePrefix); + } + + private: + void AddJITSymbolToTable(llvm::StringRef jit_symbol_name, + llvm::StringRef cpp_symbol_name, + void* jit_symbol_value) { + // The JIT symbol name and the C++ symbol name (with an extern "C" linkage) + // need to match, otherwise AOT links will fail. + CHECK(jit_symbol_name == cpp_symbol_name); + CHECK(jit_symbol_table_.insert({jit_symbol_name, jit_symbol_value}).second); + } + + void Populate() { +#define ADD_JIT_SYMBOL_TO_TABLE(base_name) \ + do { \ + AddJITSymbolToTable( \ + xla::cpu::runtime::k##base_name##SymbolName, \ + "__xla_cpu_runtime_" #base_name, \ + reinterpret_cast(__xla_cpu_runtime_##base_name)); \ + } while (false) + + ADD_JIT_SYMBOL_TO_TABLE(AcquireInfeedBufferForDequeue); + ADD_JIT_SYMBOL_TO_TABLE(ReleaseInfeedBufferAfterDequeue); + ADD_JIT_SYMBOL_TO_TABLE(AcquireOutfeedBufferForPopulation); + ADD_JIT_SYMBOL_TO_TABLE(ReleaseOutfeedBufferAfterPopulation); + ADD_JIT_SYMBOL_TO_TABLE(ExpV8F32AVX); + ADD_JIT_SYMBOL_TO_TABLE(LogV8F32AVX); + ADD_JIT_SYMBOL_TO_TABLE(ExpV4F32SSE); + ADD_JIT_SYMBOL_TO_TABLE(LogV4F32SSE); + ADD_JIT_SYMBOL_TO_TABLE(ExpV4F32NEON); + ADD_JIT_SYMBOL_TO_TABLE(LogV4F32NEON); + ADD_JIT_SYMBOL_TO_TABLE(EigenConvF32); + ADD_JIT_SYMBOL_TO_TABLE(EigenMatMulF32); + ADD_JIT_SYMBOL_TO_TABLE(EigenMatMulF64); + ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedConvF32); + ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedMatMulF32); + ADD_JIT_SYMBOL_TO_TABLE(EigenSingleThreadedMatMulF64); + +#undef ADD_JIT_SYMBOL_TO_TABLE + } + + llvm::StringMap jit_symbol_table_; +}; + +const JITSymbolTable& GetJITSymbolTable() { + static JITSymbolTable* symbol_table = new JITSymbolTable; + return *symbol_table; +} + // A simple SymbolResolver that delegates to the host dynamic linker. struct SimpleResolver : public llvm::JITSymbolResolver { llvm::JITSymbol findSymbol(const std::string& name) override { - void* func_addr = CustomCallTargetRegistry::Global()->Lookup(name); + std::string canonical_name = CanonicalizeSymbol(name); + const JITSymbolTable& jit_symbol_table = GetJITSymbolTable(); + + void* func_addr = JITSymbolTable::MustBeInTable(canonical_name) + ? jit_symbol_table.Lookup(canonical_name) + : dlsym(RTLD_DEFAULT, canonical_name.c_str()); + if (func_addr == nullptr) { return nullptr; } @@ -159,117 +238,5 @@ llvm::JITSymbol SimpleOrcJIT::FindSymbol(const std::string& name) { return nullptr; } -namespace { -// Register some known symbols with the CustomCallTargetRegistry. -bool RegisterKnownJITSymbols() { - CustomCallTargetRegistry* registry = CustomCallTargetRegistry::Global(); - -#define REGISTER_CPU_RUNTIME_SYMBOL(base_name) \ - do { \ - auto* function_address = \ - reinterpret_cast(__xla_cpu_runtime_##base_name); \ - registry->Register(xla::cpu::runtime::k##base_name##SymbolName, \ - function_address); \ - CHECK_EQ( \ - tensorflow::StringPiece(xla::cpu::runtime::k##base_name##SymbolName), \ - "__xla_cpu_runtime_" #base_name); \ - } while (false) - - REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue); - REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue); - REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation); - REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation); - REGISTER_CPU_RUNTIME_SYMBOL(ExpV8F32AVX); - REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX); - REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32SSE); - REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE); - REGISTER_CPU_RUNTIME_SYMBOL(ExpV4F32NEON); - REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON); - REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32); - REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); - REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); - REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); - REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); - REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); - -#undef REGISTER_CPU_RUNTIME_SYMBOL - -#define REGISTER_LIBM_SYMBOL(name) \ - do { \ - /* Register both the F32 and F64 variants of the libm symbol. */ \ - registry->Register(#name "f", reinterpret_cast(name##f)); \ - registry->Register(#name, reinterpret_cast(name)); \ - } while (false) - - REGISTER_LIBM_SYMBOL(acos); - REGISTER_LIBM_SYMBOL(acosh); - REGISTER_LIBM_SYMBOL(asin); - REGISTER_LIBM_SYMBOL(asinh); - REGISTER_LIBM_SYMBOL(atan); - REGISTER_LIBM_SYMBOL(atan2); - REGISTER_LIBM_SYMBOL(atanh); - REGISTER_LIBM_SYMBOL(cbrt); - REGISTER_LIBM_SYMBOL(ceil); - REGISTER_LIBM_SYMBOL(copysign); - REGISTER_LIBM_SYMBOL(cos); - REGISTER_LIBM_SYMBOL(cosh); - REGISTER_LIBM_SYMBOL(erf); - REGISTER_LIBM_SYMBOL(erfc); - REGISTER_LIBM_SYMBOL(exp); - REGISTER_LIBM_SYMBOL(exp2); - REGISTER_LIBM_SYMBOL(expm1); - REGISTER_LIBM_SYMBOL(fabs); - REGISTER_LIBM_SYMBOL(fdim); - REGISTER_LIBM_SYMBOL(floor); - REGISTER_LIBM_SYMBOL(fma); - REGISTER_LIBM_SYMBOL(fmax); - REGISTER_LIBM_SYMBOL(fmin); - REGISTER_LIBM_SYMBOL(fmod); - REGISTER_LIBM_SYMBOL(frexp); - REGISTER_LIBM_SYMBOL(hypot); - REGISTER_LIBM_SYMBOL(ilogb); - REGISTER_LIBM_SYMBOL(ldexp); - REGISTER_LIBM_SYMBOL(lgamma); - REGISTER_LIBM_SYMBOL(llrint); - REGISTER_LIBM_SYMBOL(llround); - REGISTER_LIBM_SYMBOL(log); - REGISTER_LIBM_SYMBOL(log10); - REGISTER_LIBM_SYMBOL(log1p); - REGISTER_LIBM_SYMBOL(log2); - REGISTER_LIBM_SYMBOL(logb); - REGISTER_LIBM_SYMBOL(lrint); - REGISTER_LIBM_SYMBOL(lround); - REGISTER_LIBM_SYMBOL(modf); - REGISTER_LIBM_SYMBOL(nan); - REGISTER_LIBM_SYMBOL(nearbyint); - REGISTER_LIBM_SYMBOL(nextafter); - REGISTER_LIBM_SYMBOL(nexttoward); - REGISTER_LIBM_SYMBOL(pow); - REGISTER_LIBM_SYMBOL(remainder); - REGISTER_LIBM_SYMBOL(remquo); - REGISTER_LIBM_SYMBOL(rint); - REGISTER_LIBM_SYMBOL(round); - REGISTER_LIBM_SYMBOL(scalbln); - REGISTER_LIBM_SYMBOL(scalbn); - REGISTER_LIBM_SYMBOL(sin); - REGISTER_LIBM_SYMBOL(sincos); - REGISTER_LIBM_SYMBOL(sinh); - REGISTER_LIBM_SYMBOL(sqrt); - REGISTER_LIBM_SYMBOL(tan); - REGISTER_LIBM_SYMBOL(tanh); - REGISTER_LIBM_SYMBOL(tgamma); - REGISTER_LIBM_SYMBOL(trunc); - -#undef REGISTER_LIBM_SYMBOL - - registry->Register("memcpy", reinterpret_cast(memcpy)); - registry->Register("memmove", reinterpret_cast(memmove)); - registry->Register("memset", reinterpret_cast(memset)); - return true; -} - -bool unused = RegisterKnownJITSymbols(); -} // namespace - } // namespace cpu } // namespace xla diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 84bebd4708..e45b839afd 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -23,6 +23,7 @@ filegroup( ]), ) +load("//tensorflow/compiler/xla:xla.bzl", "export_dynamic_linkopts") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test_library") load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites") @@ -980,13 +981,13 @@ xla_test( xla_test( name = "custom_call_test", srcs = ["custom_call_test.cc"], + linkopts = export_dynamic_linkopts, deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index 74f73a1ddc..342478bc74 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -18,7 +18,6 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/ptr_util.h" -#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -32,19 +31,19 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/test.h" -namespace { -void R0F32Add2(float* out, float** in) { + +extern "C" void TF_EXPORT R0F32Add2(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float*)); *out = **in + 2.0f; } -void R2F32ReduceSum(float* out, float** in) { +extern "C" void TF_EXPORT R2F32ReduceSum(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); float* array = in[0]; *out = array[0] + array[1] + array[2] + array[3]; } -void Add1ToValues(float* out, float** in) { +extern "C" void TF_EXPORT Add1ToValues(float* out, float** in) { TF_ANNOTATE_MEMORY_IS_INITIALIZED(in, sizeof(float) * 4); float* array = in[0]; out[0] = array[0] + 1; @@ -52,11 +51,6 @@ void Add1ToValues(float* out, float** in) { out[2] = array[2] + 1; out[3] = array[3] + 1; } -} // namespace - -REGISTER_CUSTOM_CALL_TARGET(R0F32Add2); -REGISTER_CUSTOM_CALL_TARGET(R2F32ReduceSum); -REGISTER_CUSTOM_CALL_TARGET(Add1ToValues); namespace xla { namespace { diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl index 3fa5bcc1df..22e70ec97a 100644 --- a/tensorflow/compiler/xla/xla.bzl +++ b/tensorflow/compiler/xla/xla.bzl @@ -17,3 +17,11 @@ def xla_proto_library(name, srcs=[], deps=[], visibility=None, testonly=0): protoc="@protobuf_archive//:protoc", testonly=testonly, visibility=visibility,) + +# Flags required for modules that export symbols that are to be called by the +# XLA CustomCall operator. CustomCall must be able to find symbols with dlsym(), +# which on Linux requires we link with --export-dynamic. +export_dynamic_linkopts = select({ + "//tensorflow:darwin": [], + "//conditions:default": ["-Wl,--export-dynamic"], +}) -- GitLab From e4aa9dc317773ff66d85ac422b83e8952d4610b5 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 5 Oct 2017 15:53:25 -0700 Subject: [PATCH 088/909] Start of work towards ordering access to resources in tfe.defun/graph_callable. Still missing handling control flow and pessimistic alias analysis. PiperOrigin-RevId: 171221946 --- tensorflow/python/eager/BUILD | 1 - tensorflow/python/eager/function.py | 28 +++++++++++++++++-- tensorflow/python/eager/graph_callable.py | 14 ++++------ .../python/eager/graph_callable_test.py | 13 +++++++++ 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 76d4f37e9a..963eaf0742 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -308,7 +308,6 @@ py_library( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", - "//tensorflow/python:graph_to_function_def", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:util", "//tensorflow/python:variable_scope", diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 6ffc914f73..8a1936b3fe 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -47,6 +47,28 @@ _scoped_captures = threading.local() _scoped_captures.tensors = None +def make_function_def(graph, operations, inputs, outputs): + """Makes function def where accesses to resources are serialized.""" + last_op_using_resource_tensor = {} + + # TODO(apassos) probably control flow has to be handled delicately here as in + # if a resource is accessed inside a control flow context we need the control + # dependency to point to something outside the context which is guaranteed to + # happen after the access. + # + # TODO(apassos) this should do some form of alias analysis as ops which + # forward the resources such as Identity and Switch can cause serialization to + # fail. + for op in operations: + for t in op.inputs: + if t.dtype == dtypes.resource: + if t.name in last_op_using_resource_tensor: + op._add_control_input(last_op_using_resource_tensor[t.name]) # pylint: disable=protected-access + last_op_using_resource_tensor[t.name] = op + return graph_to_function_def.graph_to_function_def( + graph, operations, inputs, outputs) + + @contextlib.contextmanager def capture_tensors(captures): old = _scoped_captures.__dict__.get("tensors", None) @@ -217,14 +239,14 @@ class _GraphModeFunction(object): grad_ys=self._out_grad_placeholders) shapes = [x.shape for x in in_gradients if x is not None] captures = list(sorted(c.captured_tensors, key=lambda x: x.name)) - forward_function_def = graph_to_function_def.graph_to_function_def( + forward_function_def = make_function_def( self._graph, self._ops, self._input_placeholders, filtered_outputs + captures) self._forward_fdef = _DefinedFunction(forward_function_def) _register_with_name(_forward_name(self._func_name), forward_function_def) backward_outputs = [x for x in in_gradients if x is not None] all_inputs = self._out_grad_placeholders + captures - backward_function_def = graph_to_function_def.graph_to_function_def( + backward_function_def = make_function_def( self._graph, [x.op for x in self._out_grad_placeholders ] + list(sorted(c.known_ops, key=lambda x: x.name)), all_inputs, backward_outputs) @@ -386,7 +408,7 @@ def _defun_internal(name, func, args, kwds): all_inputs = flat_inputs + list(extra_placeholders) func_def_outputs = [x for x in outputs_list if x is not None] - inference_function_def = graph_to_function_def.graph_to_function_def( + inference_function_def = make_function_def( tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 5933da7865..64d1659993 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -27,7 +27,6 @@ from tensorflow.python.eager import function from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import resource_variable_ops @@ -186,11 +185,10 @@ class _VariableCapturingScope(object): shared_name=name, shape=shape, dtype=dtype) if initializer is None: initializer = _default_initializer(name, shape, dtype) - with tf_ops.control_dependencies( - [resource_variable_ops.assign_variable_op( - graph_mode_resource, initializer(shape, dtype))]): - handle = array_ops.identity(v.variable.handle) - return _VariableFromResource(handle, dtype, name, shape=v.shape) + resource_variable_ops.assign_variable_op( + graph_mode_resource, initializer(shape, dtype)) + return _VariableFromResource( + graph_mode_resource, dtype, name, shape=v.shape) scope = variable_scope.get_variable_scope() with variable_scope.variable_scope(scope, custom_getter=_custom_getter): @@ -357,7 +355,7 @@ def _graph_callable_internal(func, shape_and_dtypes): all_inputs = variable_placeholders + placeholder_inputs func_def_outputs = [x for x in outputs_list if isinstance(x, tf_ops.Tensor)] - initializer_function_def = graph_to_function_def.graph_to_function_def( + initializer_function_def = function.make_function_def( tmp_graph, initializing_operations, placeholder_inputs, @@ -381,7 +379,7 @@ def _graph_callable_internal(func, shape_and_dtypes): capture_func_def_outputs = [ x for x in captured_outlist if isinstance(x, tf_ops.Tensor)] - captured_function_def = graph_to_function_def.graph_to_function_def( + captured_function_def = function.make_function_def( tmp_graph, capturing_operations, all_inputs, diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index cee6adec04..4ad8f1f36e 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -100,6 +100,19 @@ class GraphCallableTest(test.TestCase): constant_op.constant([2.], dtype=dtypes.float32)).numpy()) + def testUpdatesAreOrdered(self): + + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(), dtype=dtypes.float32)]) + def my_function(x): + v = variable_scope.get_variable( + "v", initializer=init_ops.zeros_initializer(), shape=()) + v.assign(x + 1) + v.assign(v * x) + return v.read_value() + + self.assertEqual(my_function(constant_op.constant(2.0)).numpy(), 6.0) + def testEmptyInitializer(self): @graph_callable.graph_callable( -- GitLab From f5ac1f40c96e3d41464ce39d18d9f97b9acfadc7 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 5 Oct 2017 16:18:33 -0700 Subject: [PATCH 089/909] Fixed the training_test on gpu-py3. PiperOrigin-RevId: 171225190 --- tensorflow/python/estimator/training_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/training_test.py b/tensorflow/python/estimator/training_test.py index 8c00ebddf3..d88ca2c925 100644 --- a/tensorflow/python/estimator/training_test.py +++ b/tensorflow/python/estimator/training_test.py @@ -705,7 +705,7 @@ class TrainingExecutorRunMasterTest(test.TestCase): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123} mock_est.config = self._run_config - mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec, max_steps=123) mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[]) executor = training._TrainingExecutor(mock_est, mock_train_spec, @@ -750,7 +750,7 @@ class TrainingExecutorRunMasterTest(test.TestCase): mock_est = test.mock.Mock(spec=estimator_lib.Estimator) mock_est.evaluate = lambda *args, **kw: {ops.GraphKeys.GLOBAL_STEP: 123} mock_est.config = self._run_config - mock_train_spec = test.mock.Mock(spec=training.TrainSpec) + mock_train_spec = test.mock.Mock(spec=training.TrainSpec, max_steps=123) mock_eval_spec = test.mock.Mock(spec=training.EvalSpec, exporters=[]) executor = training._TrainingExecutor(mock_est, mock_train_spec, -- GitLab From 073d90578904aa00dee34e27d9cc6bac68af2c47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 16:34:53 -0700 Subject: [PATCH 090/909] Respect container context when creating ResourceVariables in Eager mode. PiperOrigin-RevId: 171227139 --- .../kernel_tests/resource_variable_ops_test.py | 11 +++++++++++ tensorflow/python/ops/resource_variable_ops.py | 12 ++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 17ecb6faf5..8cf8286ed1 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -411,6 +411,17 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): # Test operations self.assertAllEqual((v * 2).numpy(), (v + v).numpy()) + def testContainerEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(initial_value=lambda: 1, + name="same") + with ops.container("different"): + v2 = resource_variable_ops.ResourceVariable(initial_value=lambda: 0, + name="same") + v2.assign(2) + self.assertEqual(1, v1.read_value().numpy()) + self.assertEqual(2, v2.read_value().numpy()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 4ef9b05d51..cbfa141256 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -38,9 +38,11 @@ from tensorflow.python.ops.gen_resource_variable_ops import * from tensorflow.python.util import compat -def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode, - container=None): +def _eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode): """Creates a variable handle with information to do shape inference.""" + container = ops.get_default_graph()._container # pylint: disable=protected-access + if container is None: + container = "" handle = gen_resource_variable_ops.var_handle_op(shape=shape, dtype=dtype, shared_name=shared_name, name=name, @@ -305,8 +307,7 @@ class ResourceVariable(variables.Variable): dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, - graph_mode=False, - container="") + graph_mode=False) self._handle_device = ( self._handle.device if self._in_graph_mode else context.get_default_context().device_name) @@ -332,8 +333,7 @@ class ResourceVariable(variables.Variable): dtype=initial_value.dtype.base_dtype, shared_name=handle_name, name=name, - graph_mode=self._in_graph_mode, - container="") + graph_mode=self._in_graph_mode) self._handle_device = (self._handle.device if self._in_graph_mode else context.get_default_context().device_name) self._graph_shape = initial_value.get_shape() -- GitLab From be2b3dcbb6f17d472fa60553ab149f4472b27643 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 5 Oct 2017 17:10:00 -0700 Subject: [PATCH 091/909] Build tests only by default for ci_parameterized_build.sh PiperOrigin-RevId: 171231427 --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 7a1479c150..f640f07585 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -129,7 +129,7 @@ BAZEL_CMD="bazel test" BAZEL_BUILD_ONLY_CMD="bazel build" BAZEL_CLEAN_CMD="bazel clean" -DEFAULT_BAZEL_CONFIGS="--config=gcp --config=hdfs" +DEFAULT_BAZEL_CONFIGS="--config=gcp --config=hdfs --build_tests_only" PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh" PIP_TEST_TUTORIALS_FLAG="--test_tutorials" -- GitLab From d6513c8149d5b69faa250949c6bec6c796c553e8 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 5 Oct 2017 17:41:09 -0700 Subject: [PATCH 092/909] Automated g4 rollback of changelist 171231427 PiperOrigin-RevId: 171234659 --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index f640f07585..7a1479c150 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -129,7 +129,7 @@ BAZEL_CMD="bazel test" BAZEL_BUILD_ONLY_CMD="bazel build" BAZEL_CLEAN_CMD="bazel clean" -DEFAULT_BAZEL_CONFIGS="--config=gcp --config=hdfs --build_tests_only" +DEFAULT_BAZEL_CONFIGS="--config=gcp --config=hdfs" PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh" PIP_TEST_TUTORIALS_FLAG="--test_tutorials" -- GitLab From bdbcde775f47d56a98b7f0f7dcd72bcb83867ae8 Mon Sep 17 00:00:00 2001 From: Mike Case Date: Thu, 5 Oct 2017 18:41:51 -0700 Subject: [PATCH 093/909] Fix small typo in docs of learn runner. --- tensorflow/contrib/learn/python/learn/learn_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py index 9f9740ec49..2af723a0d6 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner.py @@ -165,7 +165,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None, must be None. 2) It accepts two arguments `run_config` and `hparams`, which should be used to create the `Estimator` (`run_config` passed as `config` to its - constructor; `hparams` used as the hyper-paremeters of the model). + constructor; `hparams` used as the hyper-parameters of the model). It must return an `Experiment`. For this case, `output_dir` must be None. output_dir: Base output directory [Deprecated]. schedule: The name of the method in the `Experiment` to run. -- GitLab From 86238e8d09efce59de038b062a230030aa8bdd3a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 5 Oct 2017 18:38:03 -0700 Subject: [PATCH 094/909] Track memory allocation/deallocation history. PiperOrigin-RevId: 171239477 --- .../python/kernel_tests/core_rnn_cell_test.py | 16 ++- .../rnn/python/kernel_tests/core_rnn_test.py | 26 ++-- .../core/common_runtime/direct_session.cc | 3 + tensorflow/core/common_runtime/executor.cc | 119 ++++++++++-------- .../common_runtime/step_stats_collector.cc | 99 +++++++++++---- .../common_runtime/step_stats_collector.h | 51 +++++++- tensorflow/core/distributed_runtime/worker.cc | 1 + .../worker_cache_logger.cc | 2 +- tensorflow/core/framework/step_stats.proto | 12 +- .../core/framework/tracking_allocator.cc | 20 ++- .../core/framework/tracking_allocator.h | 18 ++- .../core/framework/tracking_allocator_test.cc | 28 ++++- tensorflow/core/platform/gpu_tracer_test.cc | 1 + .../profiler/internal/run_metadata_test.py | 29 +++++ 14 files changed, 317 insertions(+), 108 deletions(-) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index deebadc142..8349188f6f 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -450,6 +450,17 @@ class RNNCellTest(test.TestCase): outputs, _ = cell(x, m) self.assertTrue("cpu:14159" in outputs.device.lower()) + def _retrieve_cpu_gpu_stats(self, run_metadata): + cpu_stats = None + gpu_stats = None + step_stats = run_metadata.step_stats + for ds in step_stats.dev_stats: + if "cpu:0" in ds.device[-5:].lower(): + cpu_stats = ds.node_stats + if "gpu:0" == ds.device[-5:].lower(): + gpu_stats = ds.node_stats + return cpu_stats, gpu_stats + def testDeviceWrapperDynamicExecutionNodesAreAllProperlyLocated(self): if not test.is_gpu_available(): # Can't perform this test w/o a GPU @@ -471,10 +482,7 @@ class RNNCellTest(test.TestCase): sess.run([variables_lib.global_variables_initializer()]) _ = sess.run(outputs, options=opts, run_metadata=run_metadata) - step_stats = run_metadata.step_stats - ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1 - gpu_stats = step_stats.dev_stats[ix].node_stats - cpu_stats = step_stats.dev_stats[1 - ix].node_stats + cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata) self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name]) self.assertTrue([s for s in gpu_stats if "gru_cell" in s.node_name]) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index 40a3fb2fb0..2fa033632a 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -2203,6 +2203,17 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): return run_metadata + def _retrieve_cpu_gpu_stats(self, run_metadata): + cpu_stats = None + gpu_stats = None + step_stats = run_metadata.step_stats + for ds in step_stats.dev_stats: + if "cpu:0" in ds.device[-5:].lower(): + cpu_stats = ds.node_stats + if "gpu:0" == ds.device[-5:].lower(): + gpu_stats = ds.node_stats + return cpu_stats, gpu_stats + def testRNNOnCPUCellOnGPU(self): if not test.is_gpu_available(): return # Test requires access to a GPU @@ -2210,10 +2221,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): gpu_dev = test.gpu_device_name() run_metadata = self._execute_rnn_on( rnn_device="/cpu:0", cell_device=gpu_dev) - step_stats = run_metadata.step_stats - ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 - gpu_stats = step_stats.dev_stats[ix].node_stats - cpu_stats = step_stats.dev_stats[1 - ix].node_stats + cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata) def _assert_in(op_str, in_stats, out_stats): self.assertTrue(any(op_str in s.node_name for s in in_stats)) @@ -2236,10 +2244,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): run_metadata = self._execute_rnn_on( rnn_device="/cpu:0", cell_device="/cpu:0", input_device=gpu_dev) - step_stats = run_metadata.step_stats - ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 - gpu_stats = step_stats.dev_stats[ix].node_stats - cpu_stats = step_stats.dev_stats[1 - ix].node_stats + cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata) def _assert_in(op_str, in_stats, out_stats): self.assertTrue(any(op_str in s.node_name for s in in_stats)) @@ -2255,10 +2260,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): gpu_dev = test.gpu_device_name() run_metadata = self._execute_rnn_on( input_device=gpu_dev) - step_stats = run_metadata.step_stats - ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 - gpu_stats = step_stats.dev_stats[ix].node_stats - cpu_stats = step_stats.dev_stats[1 - ix].node_stats + cpu_stats, gpu_stats = self._retrieve_cpu_gpu_stats(run_metadata) def _assert_in(op_str, in_stats, out_stats): self.assertTrue(any(op_str in s.node_name for s in in_stats)) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 8674831eac..316fb0ac16 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -652,6 +652,9 @@ Status DirectSession::Run(const RunOptions& run_options, // Save the output tensors of this run we choose to keep. TF_RETURN_IF_ERROR( run_state.tensor_store.SaveTensors(output_names, &session_state_)); + if (args.stats_collector) { + args.stats_collector->Finalize(); + } // Build and return the cost model as instructed. mutex_lock l(executor_lock_); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index b1537eab01..f57834cfbe 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -74,10 +74,13 @@ bool IsInitializationOp(const Node* node) { // Returns true iff the node is a transfer node. // TODO(tucker): merge with the DetailText function in session.cc // in a common location. -bool SetTimelineLabel(const Node* node, NodeExecStats* node_stats) { +bool SetTimelineLabel(const Node* node, NodeExecStatsWrapper* stats) { bool is_transfer_node = false; + if (!stats) { + return is_transfer_node; + } string memory; - for (auto& all : node_stats->memory()) { + for (auto& all : stats->stats()->memory()) { int64 tot = all.total_bytes(); if (tot >= 0.1 * 1048576.0) { int64 peak = all.peak_bytes(); @@ -115,7 +118,7 @@ bool SetTimelineLabel(const Node* node, NodeExecStats* node_stats) { strings::StrCat(memory, node->name(), " = ", node->type_string(), "(", str_util::Join(node->requested_inputs(), ", "), ")"); } - node_stats->set_timeline_label(text); + stats->stats()->set_timeline_label(text); return is_transfer_node; } @@ -123,49 +126,52 @@ bool SetTimelineLabel(const Node* node, NodeExecStats* node_stats) { namespace nodestats { inline int64 NowInUsec() { return Env::Default()->NowMicros(); } -void SetScheduled(NodeExecStats* nt, int64 t) { nt->set_scheduled_micros(t); } +void SetScheduled(NodeExecStatsWrapper* stats, int64 t) { + if (!stats) return; + stats->stats()->set_scheduled_micros(t); +} -void SetAllStart(NodeExecStats* nt) { nt->set_all_start_micros(NowInUsec()); } +void SetAllStart(NodeExecStatsWrapper* stats) { + if (!stats) return; + stats->stats()->set_all_start_micros(NowInUsec()); +} -void SetOpStart(NodeExecStats* nt) { +void SetOpStart(NodeExecStatsWrapper* stats) { + if (!stats) return; + NodeExecStats* nt = stats->stats(); DCHECK_NE(nt->all_start_micros(), 0); nt->set_op_start_rel_micros(NowInUsec() - nt->all_start_micros()); } -void SetOpEnd(NodeExecStats* nt) { +void SetOpEnd(NodeExecStatsWrapper* stats) { + if (!stats) return; + NodeExecStats* nt = stats->stats(); DCHECK_NE(nt->all_start_micros(), 0); nt->set_op_end_rel_micros(NowInUsec() - nt->all_start_micros()); } -void SetAllEnd(NodeExecStats* nt) { +void SetAllEnd(NodeExecStatsWrapper* stats) { + if (!stats) return; + NodeExecStats* nt = stats->stats(); DCHECK_NE(nt->all_start_micros(), 0); nt->set_all_end_rel_micros(NowInUsec() - nt->all_start_micros()); } -void SetOutput(NodeExecStats* nt, int slot, const Tensor* v) { +void SetOutput(NodeExecStatsWrapper* stats, int slot, const Tensor* v) { + if (!stats) return; DCHECK(v); - NodeOutput* no = nt->add_output(); + NodeOutput* no = stats->stats()->add_output(); no->set_slot(slot); v->FillDescription(no->mutable_tensor_description()); } -void SetMemory(NodeExecStats* nt, OpKernelContext* ctx) { +void SetMemory(NodeExecStatsWrapper* stats, OpKernelContext* ctx) { + if (!stats) return; + for (const auto& allocator_pair : ctx->wrapped_allocators()) { - AllocatorMemoryUsed* memory = nt->add_memory(); - // retrieving the sizes from the wrapped allocator removes the - // executor's reference to it, so allocator_pair.second must not - // be dereferenced again after this statement - const auto sizes = allocator_pair.second->GetSizesAndUnRef(); - memory->set_allocator_name(allocator_pair.first->Name()); - memory->set_total_bytes(std::get<0>(sizes)); - memory->set_peak_bytes(std::get<1>(sizes)); - memory->set_live_bytes(std::get<2>(sizes)); - - AllocatorStats stats; - allocator_pair.first->GetStats(&stats); - memory->set_allocator_bytes_in_use(stats.bytes_in_use); - } - auto* ms = nt->mutable_memory_stats(); + stats->AddAllocation(allocator_pair.first, allocator_pair.second); + } + auto* ms = stats->stats()->mutable_memory_stats(); ms->set_host_temp_memory_size(ctx->host_temp_memory_size()); ms->set_device_temp_memory_size(ctx->device_temp_memory_size()); for (const auto& alloc_id : ctx->host_persistent_alloc_ids()) { @@ -179,12 +185,14 @@ void SetMemory(NodeExecStats* nt, OpKernelContext* ctx) { ctx->device_persistent_memory_allocated()); } -void SetReferencedTensors(NodeExecStats* nt, +void SetReferencedTensors(NodeExecStatsWrapper* stats, const TensorReferenceVector& tensors) { + if (!stats) return; // be careful not to increment the reference count on any tensor // while recording the information for (size_t i = 0; i < tensors.size(); ++i) { - AllocationDescription* description = nt->add_referenced_tensor(); + AllocationDescription* description = + stats->stats()->add_referenced_tensor(); tensors.at(i).FillDescription(description); } } @@ -1241,7 +1249,7 @@ class ExecutorState { // After item->kernel computation is done, processes its outputs. Status ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, - EntryVector* outputs, NodeExecStats* stats); + EntryVector* outputs, NodeExecStatsWrapper* stats); // After processing the outputs, propagates the outputs to their dsts. // Contents of *outputs are left in an indeterminate state after @@ -1252,7 +1260,8 @@ class ExecutorState { // "node" just finishes. Takes ownership of "stats". Returns true if // execution has completed. bool NodeDone(const Status& s, const Node* node, const TaggedNodeSeq& ready, - NodeExecStats* stats, TaggedNodeReadyQueue* inline_ready); + NodeExecStatsWrapper* stats, + TaggedNodeReadyQueue* inline_ready); // Schedule all the expensive nodes in 'ready', and put all the inexpensive // nodes in 'ready' into 'inline_ready'. @@ -1448,7 +1457,8 @@ void ExecutorState::RunAsync(Executor::DoneCallback done) { // sync kernels because these vectors are kept on the stack. struct ExecutorState::AsyncState { AsyncState(const OpKernelContext::Params& p, const TaggedNode& _tagged_node, - const NodeItem* _item, Entry* _first_input, NodeExecStats* _stats) + const NodeItem* _item, Entry* _first_input, + NodeExecStatsWrapper* _stats) : saved_inputs(*p.inputs), saved_input_device_contexts(*p.input_device_contexts), saved_input_alloc_attrs(*p.input_alloc_attrs), @@ -1473,7 +1483,7 @@ struct ExecutorState::AsyncState { const NodeItem* item; Entry* first_input; OpKernelContext ctx; - NodeExecStats* stats; + NodeExecStatsWrapper* stats; private: OpKernelContext::Params* ParamsButClearingEigenGPUDevice( @@ -1517,7 +1527,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { params.stats_collector = stats_collector_; Status s; - NodeExecStats* stats = nullptr; + NodeExecStatsWrapper* stats = nullptr; EntryVector outputs; bool completed = false; inline_ready.push_back(tagged_node); @@ -1547,8 +1557,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { if (stats_collector_ && !tagged_node.is_dead) { // track allocations if and only if we are collecting statistics params.track_allocations = true; - stats = new NodeExecStats; - stats->set_node_name(node->name()); + stats = new NodeExecStatsWrapper; + stats->stats()->set_node_name(node->name()); nodestats::SetScheduled(stats, scheduled_usec); nodestats::SetAllStart(stats); } @@ -1604,17 +1614,17 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { auto done = [this, state]() { Device* device = impl_->params_.device; - NodeExecStats* stats = state->stats; // Shorthand + NodeExecStatsWrapper* stats = state->stats; // Shorthand Entry* first_input = state->first_input; // Shorthand if (vlog_) { VLOG(2) << this << " Async kernel done: " << SummarizeNode(*state->item->node); } - if (stats) nodestats::SetOpEnd(stats); + nodestats::SetOpEnd(stats); EntryVector outputs; Status s = ProcessOutputs(*state->item, &state->ctx, &outputs, stats); - if (stats) nodestats::SetMemory(stats, &state->ctx); + nodestats::SetMemory(stats, &state->ctx); // Clears inputs. const int num_inputs = state->item->num_inputs; for (int i = 0; i < num_inputs; ++i) { @@ -1633,7 +1643,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { // Get the list of all tensors accessed during the execution TensorReferenceVector accessed; state->ctx.retrieve_accessed_tensors(&accessed); - if (stats) nodestats::SetReferencedTensors(stats, accessed); + nodestats::SetReferencedTensors(stats, accessed); // callee takes ownership of the vector device->ConsumeListOfAccessedTensors(state->ctx.op_device_context(), accessed); @@ -1643,22 +1653,21 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { delete state; if (completed) Finish(); }; - if (stats) nodestats::SetOpStart(stats); + nodestats::SetOpStart(stats); device->ComputeAsync(async, &state->ctx, done); } else { // Synchronous computes. OpKernelContext ctx(¶ms, item.num_outputs); - if (stats) nodestats::SetOpStart(stats); + nodestats::SetOpStart(stats); device->Compute(CHECK_NOTNULL(op_kernel), &ctx); - if (stats) nodestats::SetOpEnd(stats); - + nodestats::SetOpEnd(stats); s = ProcessOutputs(item, &ctx, &outputs, stats); if (s.ok() && impl_->device_record_tensor_accesses_) { // Get the list of all tensors accessed during the execution ctx.retrieve_accessed_tensors(&accessed_tensors); device_context = ctx.op_device_context(); } - if (stats) nodestats::SetMemory(stats, &ctx); + nodestats::SetMemory(stats, &ctx); } } @@ -1675,7 +1684,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { } outputs.clear(); if (!accessed_tensors.empty()) { - if (stats) nodestats::SetReferencedTensors(stats, accessed_tensors); + nodestats::SetReferencedTensors(stats, accessed_tensors); // device_context is set above in synchronous computes device->ConsumeListOfAccessedTensors(device_context, accessed_tensors); } @@ -1772,7 +1781,7 @@ Status ExecutorState::PrepareInputs(const NodeItem& item, Entry* first_input, Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, EntryVector* outputs, - NodeExecStats* stats) { + NodeExecStatsWrapper* stats) { const Node* node = item.node; DCHECK_EQ(0, outputs->size()); outputs->resize(item.num_outputs); @@ -1995,16 +2004,16 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node, } bool ExecutorState::NodeDone(const Status& s, const Node* node, - const TaggedNodeSeq& ready, NodeExecStats* stats, + const TaggedNodeSeq& ready, + NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { - if (stats) { - nodestats::SetAllEnd(stats); - if (!SetTimelineLabel(node, stats)) { - // Only record non-transfer nodes. - stats_collector_->Save(impl_->params_.device->name(), stats); - } else { - delete stats; - } + nodestats::SetAllEnd(stats); + if (!SetTimelineLabel(node, stats)) { + // Only record non-transfer nodes. + // Transfers 'stats' ownership to 'stats_collector_'. + stats_collector_->Save(impl_->params_.device->name(), stats); + } else if (stats) { + delete stats; } bool abort_run = false; diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index ee12624074..e7f58f9ecf 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/common_runtime/costmodel_manager.h" #include "tensorflow/core/framework/allocation_description.pb.h" -#include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/framework/tensor_description.pb.h" +#include "tensorflow/core/framework/tracking_allocator.h" #include "tensorflow/core/graph/costmodel.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/scanner.h" @@ -25,7 +25,40 @@ limitations under the License. namespace tensorflow { -StepStatsCollector::StepStatsCollector(StepStats* ss) : step_stats_(ss) {} +NodeExecStatsWrapper::NodeExecStatsWrapper() + : NodeExecStatsWrapper(new NodeExecStats) {} +NodeExecStatsWrapper::NodeExecStatsWrapper(NodeExecStats* stats) + : stats_(stats) {} + +void NodeExecStatsWrapper::AddAllocation( + Allocator* allocator, TrackingAllocator* tracking_allocator) { + AllocatorMemoryUsed* memory = stats_->add_memory(); + memory->set_allocator_name(allocator->Name()); + auto sizes = tracking_allocator->GetSizes(); + memory->set_total_bytes(std::get<0>(sizes)); + memory->set_peak_bytes(std::get<1>(sizes)); + memory->set_live_bytes(std::get<2>(sizes)); + + AllocatorStats stats; + allocator->GetStats(&stats); + memory->set_allocator_bytes_in_use(stats.bytes_in_use); + allocations_.push_back(std::make_pair(memory, tracking_allocator)); +} + +void NodeExecStatsWrapper::Finalize() { + for (auto& alloc : allocations_) { + AllocatorMemoryUsed* memory = alloc.first; + for (auto& record : alloc.second->GetRecordsAndUnRef()) { + auto* r = memory->add_allocation_records(); + r->set_alloc_bytes(record.alloc_bytes); + r->set_alloc_micros(record.alloc_micros); + } + } + allocations_.clear(); +} + +StepStatsCollector::StepStatsCollector(StepStats* ss) + : finalized_(false), step_stats_(ss) {} static int ExtractGpuWithStreamAll(string device_name) { // Check if the device name matches the ".*gpu:(\\d+)/stream:all$" regexp, @@ -92,6 +125,9 @@ void StepStatsCollector::BuildCostModel( const std::unordered_map& device_map) { mutex_lock lock(mu_); + if (!finalized_) { + FinalizeInternal(); + } // Hardware stats for gpu are available under a fake device named // "gpu:/stream::all. // Use them instead of regular stats whenever they're available to extract @@ -208,39 +244,60 @@ void StepStatsCollector::BuildCostModel( } void StepStatsCollector::Save(const string& device, NodeExecStats* nt) { - VLOG(1) << "Save dev " << device << " nt " << nt; + Save(device, new NodeExecStatsWrapper(nt)); +} + +void StepStatsCollector::Save(const string& device, + NodeExecStatsWrapper* stats) { + if (!stats) return; + VLOG(1) << "Save dev " << device << " nt " << stats->stats(); { mutex_lock l(mu_); + CHECK(!finalized_); if (!step_stats_ || collectedNodes >= kMaxCollectedNodes) { VLOG(1) << "step_stats_ nullptr or already collected too many nodes."; - delete nt; + delete stats; return; } - DeviceStepStats* dss = nullptr; - // Slow linear scan, but it should only be called - // by a Worker in a context with < ~10 devices. - // TODO(tucker): consider adding a std::unordered_map. - for (auto& ds : *step_stats_->mutable_dev_stats()) { - if (ds.device() == device) { - dss = &ds; - break; - } - } - if (dss == nullptr) { - dss = step_stats_->add_dev_stats(); - dss->set_device(device); - } - nt->Swap(dss->add_node_stats()); + auto& dss = dev_stats_[device]; + dss.push_back(std::unique_ptr(stats)); collectedNodes++; } - delete nt; } -void StepStatsCollector::Swap(StepStats* ss) { +void StepStatsCollector::Finalize() { + mutex_lock l(mu_); + FinalizeInternal(); +} + +void StepStatsCollector::FinalizeAndSwap(StepStats* ss) { mutex_lock l(mu_); CHECK(step_stats_); + FinalizeInternal(); ss->Swap(step_stats_); collectedNodes = 0; } +void StepStatsCollector::FinalizeInternal() { + if (!step_stats_ || finalized_) { + return; + } + finalized_ = true; + std::map dev_stats_pb; + for (auto& ds : *step_stats_->mutable_dev_stats()) { + dev_stats_pb[ds.device()] = &ds; + } + for (const auto& dev_stat : dev_stats_) { + if (dev_stats_pb.find(dev_stat.first) == dev_stats_pb.end()) { + DeviceStepStats* ndev_stat = step_stats_->add_dev_stats(); + ndev_stat->set_device(dev_stat.first); + dev_stats_pb[dev_stat.first] = ndev_stat; + } + DeviceStepStats* dss = dev_stats_pb.at(dev_stat.first); + for (auto& stats : dev_stat.second) { + stats->Finalize(); + stats->stats()->Swap(dss->add_node_stats()); + } + } +} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/step_stats_collector.h b/tensorflow/core/common_runtime/step_stats_collector.h index 37b1c4b308..b1fd28a982 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.h +++ b/tensorflow/core/common_runtime/step_stats_collector.h @@ -15,23 +15,59 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_STEP_STATS_COLLECTOR_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_STEP_STATS_COLLECTOR_H_ +#include #include +#include +#include "tensorflow/core/framework/step_stats.pb.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { +class Allocator; +class AllocatorMemoryUsed; class CostModelManager; class Graph; class NodeExecStats; class StepStats; +class TrackingAllocator; + +// Wraps NodeExecStats and adds allocation to it. +class NodeExecStatsWrapper { + public: + NodeExecStatsWrapper(); + // Owns 'stats'. + NodeExecStatsWrapper(NodeExecStats* stats); + + // Destructor calls Finalize() to release the TrackingAllocators. + ~NodeExecStatsWrapper() { Finalize(); } + + NodeExecStats* stats() { return stats_.get(); } + + // "Does not take ownership of the 'allocator'. + // Transfers ownership of the 'tracking_allocator' to *this." + void AddAllocation(Allocator* allocator, + TrackingAllocator* tracking_allocator); + + private: + friend class StepStatsCollector; + + // Populates stats_ and releases TrackingAllocator. + void Finalize(); + + gtl::InlinedVector, 2> + allocations_; + std::unique_ptr stats_; +}; // StepStatsCollector manages the collection of a StepStats object. // The StepStats object holds multiple DeviceStats. // Each DeviceStats object holds multiple NodeExecStats. class StepStatsCollector { public: + // Does not take ownership of `ss`. explicit StepStatsCollector(StepStats* ss); // BuildCostModel builds or updates a CostModel managed by cost_model_manager, @@ -42,16 +78,27 @@ class StepStatsCollector { const std::unordered_map& device_map); // Save saves nt to the DeviceStats object associated with device. + // Should be called before Finalize. void Save(const string& device, NodeExecStats* nt); + void Save(const string& device, NodeExecStatsWrapper* stats); - // Swap replaces the current step stats with ss. - void Swap(StepStats* ss); + // The following 2 Finalize methods populate the StepStats passed + // from the constructor. Calling it more than once won't have any effect. + // User shouldn't call Save() methods after Finalize. + void Finalize(); + // swaps the content of StepStats* from constructor with 'ss'. + void FinalizeAndSwap(StepStats* ss); private: + void FinalizeInternal() EXCLUSIVE_LOCKS_REQUIRED(mu_); + + typedef std::vector> NodeExecStatsVec; // TODO(suharshs): Make this configurable if its not possible to find a value // that works for all cases. const uint64 kMaxCollectedNodes = 1 << 20; mutex mu_; + bool finalized_ GUARDED_BY(mu_); + std::unordered_map dev_stats_ GUARDED_BY(mu_); StepStats* step_stats_ GUARDED_BY(mu_); uint64 collectedNodes GUARDED_BY(mu_) = 0; }; diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index 94c1dd0a93..b7c5793736 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -179,6 +179,7 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request, response->AddRecv(key, val); } } + if (collector) collector->Finalize(); delete collector; delete out; done(s); diff --git a/tensorflow/core/distributed_runtime/worker_cache_logger.cc b/tensorflow/core/distributed_runtime/worker_cache_logger.cc index 8e413b80f0..702af78c88 100644 --- a/tensorflow/core/distributed_runtime/worker_cache_logger.cc +++ b/tensorflow/core/distributed_runtime/worker_cache_logger.cc @@ -60,7 +60,7 @@ bool WorkerCacheLogger::RetrieveLogs(int64 step_id, StepStats* ss) { mutex_lock l(mu_); LogMap::iterator iter = log_map_.find(step_id); if (iter != log_map_.end()) { - iter->second.collector->Swap(ss); + iter->second.collector->FinalizeAndSwap(ss); delete iter->second.collector; log_map_.erase(iter); return true; diff --git a/tensorflow/core/framework/step_stats.proto b/tensorflow/core/framework/step_stats.proto index 3b3d62193c..99dee2257e 100644 --- a/tensorflow/core/framework/step_stats.proto +++ b/tensorflow/core/framework/step_stats.proto @@ -9,9 +9,13 @@ option java_package = "org.tensorflow.framework"; import "tensorflow/core/framework/allocation_description.proto"; import "tensorflow/core/framework/tensor_description.proto"; -// TODO(tucker): The next 4 message defs are very similar to -// the *LogEntry messages in profile.proto. They should be -// unified in one place. +// An allocation/de-allocation operation performed by the allocator. +message AllocationRecord { + // The timestamp of the operation. + int64 alloc_micros = 1; + // Number of bytes allocated, or de-allocated if negative. + int64 alloc_bytes = 2; +} message AllocatorMemoryUsed { string allocator_name = 1; @@ -20,6 +24,8 @@ message AllocatorMemoryUsed { int64 peak_bytes = 3; // The bytes that are not deallocated. int64 live_bytes = 4; + // The allocation and deallocation timeline. + repeated AllocationRecord allocation_records = 6; // These are snapshots of the overall allocator memory stats. // The number of live bytes currently allocated by the allocator. diff --git a/tensorflow/core/framework/tracking_allocator.cc b/tensorflow/core/framework/tracking_allocator.cc index 1052ac0554..db996e31b0 100644 --- a/tensorflow/core/framework/tracking_allocator.cc +++ b/tensorflow/core/framework/tracking_allocator.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/tracking_allocator.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -44,6 +45,7 @@ void* TrackingAllocator::AllocateRaw( allocated_ += allocated_bytes; high_watermark_ = std::max(high_watermark_, allocated_); total_bytes_ += allocated_bytes; + allocations_.emplace_back(allocated_bytes, Env::Default()->NowMicros()); ++ref_; } } else if (track_sizes_locally_) { @@ -59,10 +61,12 @@ void* TrackingAllocator::AllocateRaw( allocated_ += allocated_bytes; high_watermark_ = std::max(high_watermark_, allocated_); total_bytes_ += allocated_bytes; + allocations_.emplace_back(allocated_bytes, Env::Default()->NowMicros()); ++ref_; } else { mutex_lock lock(mu_); total_bytes_ += num_bytes; + allocations_.emplace_back(num_bytes, Env::Default()->NowMicros()); ++ref_; } return ptr; @@ -95,6 +99,7 @@ void TrackingAllocator::DeallocateRaw(void* ptr) { if (tracks_allocation_sizes) { CHECK_GE(allocated_, allocated_bytes); allocated_ -= allocated_bytes; + allocations_.emplace_back(-allocated_bytes, Env::Default()->NowMicros()); } should_delete = UnRef(); } @@ -151,22 +156,31 @@ void TrackingAllocator::GetStats(AllocatorStats* stats) { allocator_->GetStats(stats); } -std::tuple TrackingAllocator::GetSizesAndUnRef() { +std::tuple TrackingAllocator::GetSizes() { size_t high_watermark; size_t total_bytes; size_t still_live_bytes; - bool should_delete; { mutex_lock lock(mu_); high_watermark = high_watermark_; total_bytes = total_bytes_; still_live_bytes = allocated_; + } + return std::make_tuple(total_bytes, high_watermark, still_live_bytes); +} + +gtl::InlinedVector TrackingAllocator::GetRecordsAndUnRef() { + bool should_delete; + gtl::InlinedVector allocations; + { + mutex_lock lock(mu_); + allocations.swap(allocations_); should_delete = UnRef(); } if (should_delete) { delete this; } - return std::make_tuple(total_bytes, high_watermark, still_live_bytes); + return allocations; } bool TrackingAllocator::UnRef() { diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h index 92c89d30ac..d10b0cca51 100644 --- a/tensorflow/core/framework/tracking_allocator.h +++ b/tensorflow/core/framework/tracking_allocator.h @@ -18,7 +18,9 @@ limitations under the License. #include #include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" @@ -42,6 +44,15 @@ namespace tensorflow { // TrackingAllocator keeps track of outstanding calls using a // reference count, and deletes itself once the last call has been // received and the high watermark has been retrieved. +struct AllocRecord { + AllocRecord(int64 a_btyes, int64 a_micros) + : alloc_bytes(a_btyes), alloc_micros(a_micros) {} + AllocRecord() : AllocRecord(0, 0) {} + + int64 alloc_bytes; + int64 alloc_micros; +}; + class TrackingAllocator : public Allocator { public: explicit TrackingAllocator(Allocator* allocator, bool track_ids); @@ -67,12 +78,13 @@ class TrackingAllocator : public Allocator { // value is the total number of bytes requested through this wrapper // and the second and the third are 0. // - // After GetSizesAndUnref is called, the only further calls allowed + std::tuple GetSizes(); + // After GetRecordsAndUnRef is called, the only further calls allowed // on this wrapper are calls to DeallocateRaw with pointers that // were allocated by this wrapper and have not yet been // deallocated. After this call completes and all allocated pointers // have been deallocated the wrapper will delete itself. - std::tuple GetSizesAndUnRef(); + gtl::InlinedVector GetRecordsAndUnRef(); protected: ~TrackingAllocator() override {} @@ -100,6 +112,8 @@ class TrackingAllocator : public Allocator { // this allocator. size_t total_bytes_ GUARDED_BY(mu_); + gtl::InlinedVector allocations_ GUARDED_BY(mu_); + // Track allocations locally if requested in the constructor and the // underlying allocator doesn't already do it for us. const bool track_sizes_locally_; diff --git a/tensorflow/core/framework/tracking_allocator_test.cc b/tensorflow/core/framework/tracking_allocator_test.cc index ae440cc28b..4e32a907f2 100644 --- a/tensorflow/core/framework/tracking_allocator_test.cc +++ b/tensorflow/core/framework/tracking_allocator_test.cc @@ -75,13 +75,16 @@ TEST(TrackingAllocatorTest, SimpleNoTracking) { ta->DeallocateRaw(p1); void* p2 = ta->AllocateRaw(4, 12); - std::tuple sizes = ta->GetSizesAndUnRef(); + std::tuple sizes = ta->GetSizes(); EXPECT_EQ(16, std::get<0>(sizes)); EXPECT_EQ(0, std::get<1>(sizes)); EXPECT_EQ(0, std::get<2>(sizes)); ta->DeallocateRaw(p2); + auto records = ta->GetRecordsAndUnRef(); + EXPECT_EQ(4, records[0].alloc_bytes); + EXPECT_EQ(12, records[1].alloc_bytes); // This time enable the tracking inside the tracking allocator ta = new TrackingAllocator(a, true); @@ -96,13 +99,18 @@ TEST(TrackingAllocatorTest, SimpleNoTracking) { EXPECT_LE(12, ta->AllocatedSize(p2)); EXPECT_EQ(2, ta->AllocationId(p2)); - sizes = ta->GetSizesAndUnRef(); + sizes = ta->GetSizes(); EXPECT_LE(16, std::get<0>(sizes)); EXPECT_LE(12, std::get<1>(sizes)); EXPECT_LE(12, std::get<2>(sizes)); ta->DeallocateRaw(p2); + records = ta->GetRecordsAndUnRef(); + EXPECT_LE(4, records[0].alloc_bytes); + EXPECT_GE(-4, records[1].alloc_bytes); + EXPECT_LE(12, records[2].alloc_bytes); + EXPECT_GE(-12, records[3].alloc_bytes); } TEST(TrackingAllocatorTest, SimpleTracking) { @@ -116,13 +124,19 @@ TEST(TrackingAllocatorTest, SimpleTracking) { ta->DeallocateRaw(p1); void* p2 = ta->AllocateRaw(4, 4); - std::tuple sizes = ta->GetSizesAndUnRef(); + std::tuple sizes = ta->GetSizes(); EXPECT_EQ(16, std::get<0>(sizes)); EXPECT_EQ(12, std::get<1>(sizes)); EXPECT_EQ(4, std::get<2>(sizes)); ta->DeallocateRaw(p2); + + auto records = ta->GetRecordsAndUnRef(); + EXPECT_EQ(12, records[0].alloc_bytes); + EXPECT_EQ(-12, records[1].alloc_bytes); + EXPECT_EQ(4, records[2].alloc_bytes); + EXPECT_EQ(-4, records[3].alloc_bytes); } TEST(TrackingAllocatorTest, OutOfMemory) { @@ -135,11 +149,13 @@ TEST(TrackingAllocatorTest, OutOfMemory) { void* p1 = ta->AllocateRaw(4, 12); EXPECT_EQ(nullptr, p1); - std::tuple sizes = ta->GetSizesAndUnRef(); + std::tuple sizes = ta->GetSizes(); EXPECT_EQ(0, std::get<0>(sizes)); EXPECT_EQ(0, std::get<1>(sizes)); EXPECT_EQ(0, std::get<2>(sizes)); + + EXPECT_EQ(0, ta->GetRecordsAndUnRef().size()); } TEST(TrackingAllocatorTest, FreeNullPtr) { @@ -151,11 +167,13 @@ TEST(TrackingAllocatorTest, FreeNullPtr) { ta->DeallocateRaw(nullptr); - std::tuple sizes = ta->GetSizesAndUnRef(); + std::tuple sizes = ta->GetSizes(); EXPECT_EQ(0, std::get<0>(sizes)); EXPECT_EQ(0, std::get<1>(sizes)); EXPECT_EQ(0, std::get<2>(sizes)); + + EXPECT_EQ(0, ta->GetRecordsAndUnRef().size()); } } // namespace tensorflow diff --git a/tensorflow/core/platform/gpu_tracer_test.cc b/tensorflow/core/platform/gpu_tracer_test.cc index f6c2c6cb37..ce2985fd47 100644 --- a/tensorflow/core/platform/gpu_tracer_test.cc +++ b/tensorflow/core/platform/gpu_tracer_test.cc @@ -195,6 +195,7 @@ TEST_F(GPUTracerTest, TraceToStepStatsCollector) { StepStats stats; StepStatsCollector collector(&stats); TF_ASSERT_OK(tracer->Collect(&collector)); + collector.Finalize(); // Depending on whether this runs on CPU or GPU, we will have a // different number of devices. EXPECT_GE(stats.dev_stats_size(), 1); diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py index 80df44f5f5..4ff09d3800 100644 --- a/tensorflow/python/profiler/internal/run_metadata_test.py +++ b/tensorflow/python/profiler/internal/run_metadata_test.py @@ -121,6 +121,35 @@ class RunMetadataTest(test.TestCase): self.assertEqual(len(ret['gpu:0']), 1) self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta) + def testAllocationHistory(self): + if not test.is_gpu_available(cuda_only=True): + return + + gpu_dev = test.gpu_device_name() + ops.reset_default_graph() + with ops.device(gpu_dev): + _, run_meta = _run_model() + + mm = _extract_node(run_meta, 'MatMul')['gpu:0'][0] + mm_allocs = mm.memory[0].allocation_records + # has allocation and deallocation. + self.assertEqual(len(mm_allocs), 2) + # first allocated. + self.assertGreater(mm_allocs[1].alloc_micros, mm_allocs[0].alloc_micros) + self.assertGreater(mm_allocs[0].alloc_bytes, 0) + # Then deallocated. + self.assertLess(mm_allocs[1].alloc_bytes, 0) + # All memory deallocated. + self.assertEqual(mm_allocs[0].alloc_bytes + mm_allocs[1].alloc_bytes, 0) + + rand = _extract_node( + run_meta, 'random_normal/RandomStandardNormal')['gpu:0'][0] + random_allocs = rand.memory[0].allocation_records + # random normal must allocated first since matmul depends on it. + self.assertLess(random_allocs[0].alloc_micros, mm.all_start_micros) + # deallocates the memory after matmul started. + self.assertGreater(random_allocs[1].alloc_micros, mm.all_start_micros) + def testCPU(self): ops.reset_default_graph() with ops.device('/cpu:0'): -- GitLab From 7bb0592ef2f5ee4ac9261448daf51446cfc19941 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 5 Oct 2017 20:29:46 -0700 Subject: [PATCH 095/909] Remove setting AWS logging for S3 file system. Was causing issues with tests. Can repro test failures on Macs by running... bazel test --config=s3 --cache_test_results=no --test_output=streamed //tensorflow/core/kernels:control_flow_ops_test Possible reason for error is symbol collision with AWS logging code. One possible solution would be to split out another shared object for the S3 filesystem op which does not link in libtensorflow_framework.so. This is done, for example, by libforestprotos.so in tensorflow/contrib/tensor_forest/BUILD PiperOrigin-RevId: 171246381 --- tensorflow/contrib/s3/s3_file_system.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/s3/s3_file_system.cc b/tensorflow/contrib/s3/s3_file_system.cc index b09cf81d46..daced83145 100644 --- a/tensorflow/contrib/s3/s3_file_system.cc +++ b/tensorflow/contrib/s3/s3_file_system.cc @@ -222,7 +222,6 @@ class S3ReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { S3FileSystem::S3FileSystem() { Aws::SDKOptions options; - options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Info; options.cryptoOptions.sha256Factory_create_fn = []() { return Aws::MakeShared(S3CryptoAllocationTag); }; @@ -234,7 +233,6 @@ S3FileSystem::S3FileSystem() { S3FileSystem::~S3FileSystem() { Aws::SDKOptions options; - options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Info; Aws::ShutdownAPI(options); } -- GitLab From 78af510b9aab4094a895851d61e2ea359a9b4985 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 5 Oct 2017 20:42:05 -0700 Subject: [PATCH 096/909] Temporarily don't error out if the requested device name cannot be parsed. PiperOrigin-RevId: 171246995 --- .../compiler/tf2xla/xla_compilation_device.cc | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc index 890a9ccb83..3814a2b8b9 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc @@ -98,20 +98,17 @@ void XlaCompilationDevice::Compute(OpKernel* op_kernel, b->SetOpMetadata(metadata); DeviceNameUtils::ParsedName parsed; - OP_REQUIRES( - context, - DeviceNameUtils::ParseFullName(op_kernel->requested_device(), &parsed), - errors::Internal("Unable to parse device name: ", - op_kernel->requested_device())); - xla::OpDeviceAssignment assignment; - // If no device ID assignment is found, XLA is free to use whatever device it - // wants. In practice this usually has the effect of placing things on - // device 0. - if (parsed.has_id) { - assignment.set_has_device(true); - assignment.set_device(parsed.id); + if (DeviceNameUtils::ParseFullName(op_kernel->requested_device(), &parsed)) { + // If no device ID assignment is found, XLA is free to use whatever device + // it wants. In practice this usually has the effect of placing things on + // device 0. + xla::OpDeviceAssignment assignment; + if (parsed.has_id) { + assignment.set_has_device(true); + assignment.set_device(parsed.id); + } + b->SetDeviceAssignment(assignment); } - b->SetDeviceAssignment(assignment); op_kernel->Compute(context); -- GitLab From 825a9f8d9a4cc3cce7cee2fb08dcc058b5a8e2a8 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 6 Oct 2017 05:36:08 -0700 Subject: [PATCH 097/909] [TF:XLA] Make registration of an XlaDevice for autoclustering optional. PiperOrigin-RevId: 171281666 --- .../compiler/jit/mark_for_compilation_pass.cc | 1 + tensorflow/compiler/jit/xla_cpu_device.cc | 6 +++--- tensorflow/compiler/jit/xla_device.cc | 21 +++++++++++-------- tensorflow/compiler/jit/xla_device.h | 1 + tensorflow/compiler/jit/xla_gpu_device.cc | 6 +++--- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index db2ed16f95..78d0aa86a8 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -560,6 +560,7 @@ Status MarkForCompilationPass::RunImpl( name = strings::StrCat("cluster_", cluster_sequence_num++); } n->AddAttr(kXlaClusterAttr, name); + VLOG(3) << "Assigning node " << n->name() << " to cluster " << name; } } diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc index 57b9d6b56b..2e33fdca65 100644 --- a/tensorflow/compiler/jit/xla_cpu_device.cc +++ b/tensorflow/compiler/jit/xla_cpu_device.cc @@ -39,9 +39,9 @@ Status XlaCpuDeviceFactory::CreateDevices(const SessionOptions& options, (void)registrations; std::unique_ptr device; - TF_RETURN_IF_ERROR(XlaDevice::Create("Host", DEVICE_XLA_CPU, 0, - DEVICE_CPU_XLA_JIT, options, name_prefix, - &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create( + "Host", DEVICE_XLA_CPU, 0, DEVICE_CPU_XLA_JIT, options, name_prefix, + /*register_device_for_compilation=*/true, &device)); devices->push_back(device.release()); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 888461611f..a2c91511ec 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -107,18 +107,21 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( /* static */ Status XlaDevice::Create( const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, - const string& name_prefix, std::unique_ptr* device) { + const string& name_prefix, bool register_device_for_compilation, + std::unique_ptr* device) { VLOG(1) << "XlaDevice::Create " << platform_name << " " << device_name << ":" << device_ordinal; - // These are no-ops if they have already been done previously for - // this device_name/compilation_device_name pair. - XlaOpRegistry::DeviceRegistration registration; - registration.compilation_device_name = jit_device_name; - registration.requires_compilation = true; - registration.enable_jit_by_default = false; - registration.compile_resource_ops = true; - XlaOpRegistry::RegisterCompilationDevice(device_name, registration); + if (register_device_for_compilation) { + // These are no-ops if they have already been done previously for + // this device_name/compilation_device_name pair. + XlaOpRegistry::DeviceRegistration registration; + registration.compilation_device_name = jit_device_name; + registration.requires_compilation = true; + registration.enable_jit_by_default = false; + registration.compile_resource_ops = true; + XlaOpRegistry::RegisterCompilationDevice(device_name, registration); + } auto platform = se::MultiPlatformManager::PlatformWithName(platform_name); if (!platform.ok()) { diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index 0d90b8b692..d2ec38293c 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -74,6 +74,7 @@ class XlaDevice : public LocalDevice { static Status Create(const string& platform_name, const string& device_name, int device_ordinal, const string& jit_device_name, const SessionOptions& options, const string& name_prefix, + bool register_device_for_compilation, std::unique_ptr* device); XlaDevice(const SessionOptions& options, const DeviceAttributes& attrs, diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc index 4474d8f4eb..5233665ec2 100644 --- a/tensorflow/compiler/jit/xla_gpu_device.cc +++ b/tensorflow/compiler/jit/xla_gpu_device.cc @@ -39,9 +39,9 @@ Status XlaGpuDeviceFactory::CreateDevices(const SessionOptions& options, (void)registrations; std::unique_ptr device; - Status status = - XlaDevice::Create("CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options, - name_prefix, &device); + Status status = XlaDevice::Create( + "CUDA", DEVICE_XLA_GPU, 0, DEVICE_GPU_XLA_JIT, options, name_prefix, + /*register_device_for_compilation=*/true, &device); if (!status.ok()) { // Treat failures as non-fatal; there might not be a GPU in the machine. VLOG(1) << "Failed to create XLA_GPU device: " << status; -- GitLab From ed2970634444d423261fd7b094084124ccc4f755 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 06:45:38 -0700 Subject: [PATCH 098/909] Include resource variable ops in tensorflow/core:ops build target. PiperOrigin-RevId: 171286346 --- tensorflow/core/BUILD | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c1b103c98b..eb66d8e329 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -651,14 +651,15 @@ cc_library( ":image_ops_op_lib", ":io_ops_op_lib", ":linalg_ops_op_lib", - ":lookup_ops_op_lib", ":logging_ops_op_lib", + ":lookup_ops_op_lib", ":math_ops_op_lib", ":nn_ops_op_lib", ":no_op_op_lib", ":parsing_ops_op_lib", ":random_ops_op_lib", ":remote_fused_graph_ops_op_lib", + ":resource_variable_ops_op_lib", ":script_ops_op_lib", ":sdca_ops_op_lib", ":sendrecv_ops_op_lib", -- GitLab From 0cfdb855483d98a8c42f078bae9b00281d05633a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 07:06:26 -0700 Subject: [PATCH 099/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171288134 --- .../core/ops/compat/ops_history.v1.pbtxt | 231 +++++++++++++++ tensorflow/core/ops/ops.pbtxt | 271 ++++++++++++++++++ 2 files changed, 502 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 950422305e..a3321c26f3 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -2061,6 +2061,22 @@ op { } } } +op { + name: "AssignAddVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} op { name: "AssignSub" input_arg { @@ -2107,6 +2123,38 @@ op { } } } +op { + name: "AssignSubVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} +op { + name: "AssignVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} op { name: "Atan" input_arg { @@ -7622,6 +7670,21 @@ op { type: "type" } } +op { + name: "DestroyResourceOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "ignore_lookup_error" + type: "bool" + default_value { + b: true + } + } + is_stateful: true +} op { name: "DestroyTemporaryVariable" input_arg { @@ -20716,6 +20779,22 @@ op { type: DT_STRING } } +op { + name: "ReadVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + output_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} op { name: "ReaderNumRecordsProduced" input_arg { @@ -22741,6 +22820,91 @@ op { } is_stateful: true } +op { + name: "ResourceGather" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "validate_indices" + type: "bool" + default_value { + b: true + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "ResourceScatterAdd" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "ResourceSparseApplyAdadelta" input_arg { @@ -32719,6 +32883,48 @@ op { } is_stateful: true } +op { + name: "VarHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + is_stateful: true +} +op { + name: "VarIsInitializedOp" + input_arg { + name: "resource" + type: DT_RESOURCE + } + output_arg { + name: "is_initialized" + type: DT_BOOL + } + is_stateful: true +} op { name: "Variable" output_arg { @@ -32750,6 +32956,31 @@ op { } is_stateful: true } +op { + name: "VariableShape" + input_arg { + name: "input" + type: DT_RESOURCE + } + output_arg { + name: "output" + type_attr: "out_type" + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} op { name: "VariableV2" output_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index cbde462325..429000a058 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -2039,6 +2039,27 @@ op { summary: "Update \'ref\' by adding \'value\' to it." description: "This operation outputs \"ref\" after the update is done.\nThis makes it easier to chain operations that need to use the reset value." } +op { + name: "AssignAddVariableOp" + input_arg { + name: "resource" + description: "handle to the resource in which to store the variable." + type: DT_RESOURCE + } + input_arg { + name: "value" + description: "the value by which the variable will be incremented." + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + description: "the dtype of the value." + } + summary: "Adds a value to the current value of a variable." + description: "Any ReadVariableOp which depends directly or indirectly on this assign is\nguaranteed to see the incremented value or a subsequent newer one.\n\nOutputs the incremented value, which can be used to totally order the\nincrements to this variable." + is_stateful: true +} op { name: "AssignSub" input_arg { @@ -2091,6 +2112,48 @@ op { summary: "Update \'ref\' by subtracting \'value\' from it." description: "This operation outputs \"ref\" after the update is done.\nThis makes it easier to chain operations that need to use the reset value." } +op { + name: "AssignSubVariableOp" + input_arg { + name: "resource" + description: "handle to the resource in which to store the variable." + type: DT_RESOURCE + } + input_arg { + name: "value" + description: "the value by which the variable will be incremented." + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + description: "the dtype of the value." + } + summary: "Subtracts a value from the current value of a variable." + description: "Any ReadVariableOp which depends directly or indirectly on this assign is\nguaranteed to see the incremented value or a subsequent newer one.\n\nOutputs the incremented value, which can be used to totally order the\nincrements to this variable." + is_stateful: true +} +op { + name: "AssignVariableOp" + input_arg { + name: "resource" + description: "handle to the resource in which to store the variable." + type: DT_RESOURCE + } + input_arg { + name: "value" + description: "the value to set the new tensor to use." + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + description: "the dtype of the value." + } + summary: "Assigns a new value to a variable." + description: "Any ReadVariableOp with a control dependency on this op is guaranteed to return\nthis value or a subsequent newer value of the variable." + is_stateful: true +} op { name: "Atan" input_arg { @@ -6829,6 +6892,25 @@ op { summary: "Deserialize and concatenate `SparseTensors` from a serialized minibatch." description: "The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where\n`N` is the minibatch size and the rows correspond to packed outputs of\n`SerializeSparse`. The ranks of the original `SparseTensor` objects\nmust all match. When the final `SparseTensor` is created, it has rank one\nhigher than the ranks of the incoming `SparseTensor` objects\n(they have been concatenated along a new row dimension).\n\nThe output `SparseTensor` object\'s shape values for all dimensions but the\nfirst are the max across the input `SparseTensor` objects\' shape values\nfor the corresponding dimensions. Its first shape value is `N`, the minibatch\nsize.\n\nThe input `SparseTensor` objects\' indices are assumed ordered in\nstandard lexicographic order. If this is not the case, after this\nstep run `SparseReorder` to restore index ordering.\n\nFor example, if the serialized input is a `[2 x 3]` matrix representing two\noriginal `SparseTensor` objects:\n\n index = [ 0]\n [10]\n [20]\n values = [1, 2, 3]\n shape = [50]\n\nand\n\n index = [ 2]\n [10]\n values = [4, 5]\n shape = [30]\n\nthen the final deserialized `SparseTensor` will be:\n\n index = [0 0]\n [0 10]\n [0 20]\n [1 2]\n [1 10]\n values = [1, 2, 3, 4, 5]\n shape = [2 50]" } +op { + name: "DestroyResourceOp" + input_arg { + name: "resource" + description: "handle to the resource to delete." + type: DT_RESOURCE + } + attr { + name: "ignore_lookup_error" + type: "bool" + default_value { + b: true + } + description: "whether to ignore the error when the resource\ndoesn\'t exist." + } + summary: "Deletes the resource specified by the handle." + description: "All subsequent operations using the resource will result in a NotFound\nerror status." + is_stateful: true +} op { name: "DestroyTemporaryVariable" input_arg { @@ -19351,6 +19433,26 @@ op { } summary: "Reads and outputs the entire contents of the input filename." } +op { + name: "ReadVariableOp" + input_arg { + name: "resource" + description: "handle to the resource in which to store the variable." + type: DT_RESOURCE + } + output_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + description: "the dtype of the value." + } + summary: "Reads the value of a variable." + description: "The tensor returned by this operation is immutable.\n\nThe value returned by this operation is guaranteed to be influenced by all the\nwrites on which this operation depends directly or indirectly, and to not be\ninfluenced by any of the writes which depend directly or indirectly on this\noperation." + is_stateful: true +} op { name: "ReaderNumRecordsProduced" input_arg { @@ -21551,6 +21653,98 @@ op { description: "Note that in dense implementation of this algorithm, ms and mom will\nupdate even if the grad is zero, but in this sparse implementation, ms\nand mom will not update in iterations during which the grad is zero.\n\nmean_square = decay * mean_square + (1-decay) * gradient ** 2\nDelta = learning_rate * gradient / sqrt(mean_square + epsilon)\n\nms <- rho * ms_{t-1} + (1-rho) * grad * grad\nmom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)\nvar <- var - mom" is_stateful: true } +op { + name: "ResourceGather" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "validate_indices" + type: "bool" + default_value { + b: true + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + summary: "Gather slices from the variable pointed to by `resource` according to `indices`." + description: "`indices` must be an integer tensor of any dimension (usually 0-D or 1-D).\nProduces an output tensor with shape `indices.shape + params.shape[1:]` where:\n\n```python\n # Scalar indices\n output[:, ..., :] = params[indices, :, ... :]\n\n # Vector indices\n output[i, :, ..., :] = params[indices[i], :, ... :]\n\n # Higher rank indices\n output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]\n```" + is_stateful: true +} +op { + name: "ResourceScatterAdd" + input_arg { + name: "resource" + description: "Should be from a `Variable` node." + type: DT_RESOURCE + } + input_arg { + name: "indices" + description: "A tensor of indices into the first dimension of `ref`." + type_attr: "Tindices" + } + input_arg { + name: "updates" + description: "A tensor of updated values to add to `ref`." + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + summary: "Adds sparse updates to the variable referenced by `resource`." + description: "This operation computes\n\n # Scalar indices\n ref[indices, ...] += updates[...]\n\n # Vector indices (for each i)\n ref[indices[i], ...] += updates[i, ...]\n\n # High rank indices (for each i, ..., j)\n ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]\n\nDuplicate entries are handled correctly: if multiple `indices` reference\nthe same location, their contributions add.\n\nRequires `updates.shape = indices.shape + ref.shape[1:]`.\n\n
\n\n
" + is_stateful: true +} op { name: "ResourceSparseApplyAdadelta" input_arg { @@ -31795,6 +31989,56 @@ op { description: "The basic functionality is similar to dequeue with many fewer\ncapabilities and options. This Op is optimized for performance." is_stateful: true } +op { + name: "VarHandleOp" + output_arg { + name: "resource" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + description: "the container this variable is placed in." + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + description: "the name by which this variable is referred to." + } + attr { + name: "dtype" + type: "type" + description: "the type of this variable. Must agree with the dtypes\nof all ops using this variable." + } + attr { + name: "shape" + type: "shape" + description: "The (possibly partially specified) shape of this variable." + } + summary: "Creates a handle to a Variable resource." + is_stateful: true +} +op { + name: "VarIsInitializedOp" + input_arg { + name: "resource" + description: "the input resource handle." + type: DT_RESOURCE + } + output_arg { + name: "is_initialized" + description: "a scalar boolean which is true if the variable has been\ninitialized." + type: DT_BOOL + } + summary: "Checks whether a resource handle-based variable has been initialized." + is_stateful: true +} op { name: "Variable" output_arg { @@ -31827,6 +32071,33 @@ op { summary: "Use VariableV2 instead." is_stateful: true } +op { + name: "VariableShape" + input_arg { + name: "input" + type: DT_RESOURCE + } + output_arg { + name: "output" + type_attr: "out_type" + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + summary: "Returns the shape of the variable pointed to by `resource`." + description: "This operation returns a 1-D integer tensor representing the shape of `input`.\n\nFor example:\n\n```\n# \'t\' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]\nshape(t) ==> [2, 2, 3]\n```" + is_stateful: true +} op { name: "VariableV2" output_arg { -- GitLab From bbf1085651fab743d17f74dde622c8d89ebbc102 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 07:12:43 -0700 Subject: [PATCH 100/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171288708 --- tensorflow/go/op/wrappers.go | 4846 +++++++++++++++++----------------- 1 file changed, 2423 insertions(+), 2423 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index ef1f8a9df6..29c69b3c59 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -38,156 +38,6 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in return list, start + size, nil } -// Adds sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] += updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] += updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterAdd", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// VariableShapeAttr is an optional argument to VariableShape. -type VariableShapeAttr func(optionalAttr) - -// VariableShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func VariableShapeOutType(value tf.DataType) VariableShapeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Returns the shape of the variable pointed to by `resource`. -// -// This operation returns a 1-D integer tensor representing the shape of `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "VariableShape", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Assigns a new value to a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to return -// this value or a subsequent newer value of the variable. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value to set the new tensor to use. -// -// Returns the created operation. -func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// VarHandleOpAttr is an optional argument to VarHandleOp. -type VarHandleOpAttr func(optionalAttr) - -// VarHandleOpContainer sets the optional container attribute to value. -// -// value: the container this variable is placed in. -// If not specified, defaults to "" -func VarHandleOpContainer(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// VarHandleOpSharedName sets the optional shared_name attribute to value. -// -// value: the name by which this variable is referred to. -// If not specified, defaults to "" -func VarHandleOpSharedName(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a Variable resource. -// -// Arguments: -// dtype: the type of this variable. Must agree with the dtypes -// of all ops using this variable. -// shape: The (possibly partially specified) shape of this variable. -func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "VarHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Writes a `Summary` protocol buffer with scalar values. // // The input `tag` and `value` must have the scalars. @@ -4047,73 +3897,6 @@ func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value t return op.Output(0) } -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) - -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Gather slices from the variable pointed to by `resource` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] -// -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] -// -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceGather", - Input: []tf.Input{ - resource, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Delete the TensorArray from its resource container. -// -// This enables the user to close and release the resource in the middle -// of a step/run. -// -// Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). -// -// Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - // Get the current size of the TensorArray. // // Arguments: @@ -7697,40 +7480,265 @@ func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination return scope.AddOperation(opspec) } -// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. -type FusedBatchNormGradAttr func(optionalAttr) +// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. +type QueueEnqueueManyV2Attr func(optionalAttr) -// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. +// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { +// value: If the queue is too full, this operation will block for up +// to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { return func(m optionalAttr) { - m["epsilon"] = value + m["timeout_ms"] = value } } -// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. +// Enqueues zero or more tuples of one or more tensors in the given queue. // -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. +// This operation slices each component tensor along the 0th dimension to +// make multiple queue elements. All of the tuple components must have the +// same size in the 0th dimension. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} +// The components input has k elements, which correspond to the components of +// tuples stored in the given queue. +// +// N.B. If the queue is full, this operation will block until the given +// elements have been enqueued (or 'timeout_ms' elapses, if specified). +// +// Arguments: +// handle: The handle to a queue. +// components: One or more tensors from which the enqueued tensors should +// be taken. +// +// Returns the created operation. +func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QueueEnqueueManyV2", + Input: []tf.Input{ + handle, tf.OutputList(components), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// SvdAttr is an optional argument to Svd. +type SvdAttr func(optionalAttr) + +// SvdComputeUv sets the optional compute_uv attribute to value. +// +// value: If true, left and right singular vectors will be +// computed and returned in `u` and `v`, respectively. +// If false, `u` and `v` are not set and should never referenced. +// If not specified, defaults to true +func SvdComputeUv(value bool) SvdAttr { + return func(m optionalAttr) { + m["compute_uv"] = value + } +} + +// SvdFullMatrices sets the optional full_matrices attribute to value. +// +// value: If true, compute full-sized `u` and `v`. If false +// (the default), compute only the leading `P` singular vectors. +// Ignored if `compute_uv` is `False`. +// If not specified, defaults to false +func SvdFullMatrices(value bool) SvdAttr { + return func(m optionalAttr) { + m["full_matrices"] = value + } +} + +// Computes the singular value decompositions of one or more matrices. +// +// Computes the SVD of each inner matrix in `input` such that +// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` +// +// ```python +// # a is a tensor containing a batch of matrices. +// # s is a tensor of singular values for each matrix. +// # u is the tensor containing of left singular vectors for each matrix. +// # v is the tensor containing of right singular vectors for each matrix. +// s, u, v = svd(a) +// s, _, _ = svd(a, compute_uv=False) +// ``` +// +// Arguments: +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// +// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. +// Undefined if `compute_uv` is false. +func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Svd", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Converts one or more images from RGB to HSV. +// +// Outputs a tensor of the same shape as the `images` tensor, containing the HSV +// value of the pixels. The output is only well defined if the value in `images` +// are in `[0,1]`. +// +// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and +// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 +// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. +// +// Arguments: +// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. +// +// Returns `images` converted to HSV. +func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RGBToHSV", + Input: []tf.Input{ + images, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. +type MatrixSolveLsAttr func(optionalAttr) + +// MatrixSolveLsFast sets the optional fast attribute to value. +// If not specified, defaults to true +func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { + return func(m optionalAttr) { + m["fast"] = value + } +} + +// Solves one or more linear least-squares problems. +// +// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same +// type as `matrix` and shape `[..., M, K]`. +// The output is a tensor shape `[..., N, K]` where each output matrix solves +// each of the equations +// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` +// in the least squares sense. +// +// We use the following notation for (complex) matrix and right-hand sides +// in the batch: +// +// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), +// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), +// `output`=\\(X \in \mathbb{C}^{n \times k}\\), +// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). +// +// If `fast` is `True`, then the solution is computed by solving the normal +// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then +// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares +// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + +// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as +// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the +// minimum-norm solution to the under-determined linear system, i.e. +// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), +// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable +// when \\(A\\) is numerically full rank and has a condition number +// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is +// sufficiently large. +// +// If `fast` is `False` an algorithm based on the numerically robust complete +// orthogonal decomposition is used. This computes the minimum-norm +// least-squares solution, even when \\(A\\) is rank deficient. This path is +// typically 6-7 times slower than the fast path. If `fast` is `False` then +// `l2_regularizer` is ignored. +// +// Arguments: +// matrix: Shape is `[..., M, N]`. +// rhs: Shape is `[..., M, K]`. +// l2_regularizer: Scalar tensor. +// +// @compatibility(numpy) +// Equivalent to np.linalg.lstsq +// @end_compatibility +// +// Returns Shape is `[..., N, K]`. +func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolveLs", + Input: []tf.Input{ + matrix, rhs, l2_regularizer, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. +type FusedBatchNormGradAttr func(optionalAttr) + +// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["epsilon"] = value + } +} + +// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. +// +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. +// +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} // Gradient for batch normalization. // @@ -9346,41 +9354,12 @@ func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, opt return output } -// Convert JSON-encoded Example records to binary protocol buffer strings. -// -// This op translates a tensor containing Example records, encoded using -// the [standard JSON -// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), -// into a tensor containing the same records encoded as binary protocol -// buffers. The resulting tensor can then be fed to any of the other -// Example-parsing ops. +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. // // Arguments: -// json_examples: Each string is a JSON object serialized according to the JSON -// mapping of the Example proto. -// -// Returns Each string is a binary Example protocol buffer corresponding -// to the respective element of `json_examples`. -func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeJSONExample", - Input: []tf.Input{ - json_examples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. -// -// Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. // // Returns A Tensor of type `out_type`. func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { @@ -9521,172 +9500,157 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } -// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. -type QueueDequeueV2Attr func(optionalAttr) +// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. +type OrderedMapIncompleteSizeAttr func(optionalAttr) -// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. +// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: If the queue is empty, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { +// REQUIRES: value >= 0 +func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["capacity"] = value } } -// Dequeues a tuple of one or more tensors from the given queue. -// -// This operation has k outputs, where k is the number of components -// in the tuples stored in the given queue, and output i is the ith -// component of the dequeued tuple. -// -// N.B. If the queue is empty, this operation will block until an element -// has been dequeued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// component_types: The type of each component in a tuple. +// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { +// REQUIRES: value >= 0 +func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op returns the number of incomplete elements in the underlying container. +func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueDequeueV2", - Input: []tf.Input{ - handle, - }, + Type: "OrderedMapIncompleteSize", + Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) - return - } - return components + return op.Output(0) } -// ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample. -type ParseSingleSequenceExampleAttr func(optionalAttr) +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) -// ParseSingleSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> +// RandomShuffleSeed sets the optional seed attribute to value. // -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { return func(m optionalAttr) { - m["context_sparse_types"] = value + m["seed"] = value } } -// ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> +// RandomShuffleSeed2 sets the optional seed2 attribute to value. // -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { return func(m optionalAttr) { - m["feature_list_dense_types"] = value + m["seed2"] = value } } -// ParseSingleSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. +// Randomly shuffles a tensor along its first dimension. // -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: // -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` +// +// Arguments: +// value: The tensor to be shuffled. +// +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomShuffle", + Input: []tf.Input{ + value, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// ParseSingleSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. -// -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { +// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel. +type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr { return func(m optionalAttr) { - m["feature_list_sparse_types"] = value + m["num_bits"] = value } } -// ParseSingleSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. -// -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { +// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr { return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value + m["narrow_range"] = value } } -// Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors. +// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`, // -// Arguments: -// serialized: A scalar containing a binary serialized SequenceExample proto. -// feature_list_dense_missing_assumed_empty: A vector listing the -// FeatureList keys which may be missing from the SequenceExample. If the -// associated FeatureList is missing, it is treated as empty. By default, -// any FeatureList not listed in this vector must exist in the SequenceExample. -// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). -// The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' context features associated with -// dense values. -// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors -// (scalars). The keys expected in the FeatureLists associated with sparse +// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]` +// to 'outputs' tensor of same shape as `inputs`. +// +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// +// This operation has a gradient and thus allows for training `min` and `max` // values. -// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -// debug_name: A scalar containing the name of the serialized proto. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty scalar if no name is available. -func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_sparse_keys []tf.Output, context_dense_keys []tf.Output, feature_list_sparse_keys []tf.Output, feature_list_dense_keys []tf.Output, context_dense_defaults []tf.Output, debug_name tf.Output, optional ...ParseSingleSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output) { +func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) { if scope.Err() != nil { return } @@ -9695,105 +9659,65 @@ func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list a(attrs) } opspec := tf.OpSpec{ - Type: "ParseSingleSequenceExample", + Type: "FakeQuantWithMinMaxVarsPerChannel", Input: []tf.Input{ - serialized, feature_list_dense_missing_assumed_empty, tf.OutputList(context_sparse_keys), tf.OutputList(context_dense_keys), tf.OutputList(feature_list_sparse_keys), tf.OutputList(feature_list_dense_keys), tf.OutputList(context_dense_defaults), debug_name, + inputs, min, max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values + return op.Output(0) } -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) +// TruncatedNormalAttr is an optional argument to TruncatedNormal. +type TruncatedNormalAttr func(optionalAttr) -// RandomGammaSeed sets the optional seed attribute to value. +// TruncatedNormalSeed sets the optional seed attribute to value. // // value: If either `seed` or `seed2` are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { +func TruncatedNormalSeed(value int64) TruncatedNormalAttr { return func(m optionalAttr) { m["seed"] = value } } -// RandomGammaSeed2 sets the optional seed2 attribute to value. +// TruncatedNormalSeed2 sets the optional seed2 attribute to value. // // value: A second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { +func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { return func(m optionalAttr) { m["seed2"] = value } } -// Outputs random values from the Gamma distribution(s) described by alpha. +// Outputs random values from a truncated normal distribution. // -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // // Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { +// Returns A tensor of the specified shape filled with random truncated normal +// values. +func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomGamma", + Type: "TruncatedNormal", Input: []tf.Input{ - shape, alpha, + shape, }, Attrs: attrs, } @@ -9801,106 +9725,163 @@ func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...Ran return op.Output(0) } -// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. -type OrderedMapIncompleteSizeAttr func(optionalAttr) +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) -// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. // -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { return func(m optionalAttr) { - m["capacity"] = value + m["use_locking"] = value } } -// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Update '*var' according to the Ftrl-proximal scheme. // -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrlV2", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// SkipgramAttr is an optional argument to Skipgram. +type SkipgramAttr func(optionalAttr) + +// SkipgramWindowSize sets the optional window_size attribute to value. +// +// value: The number of words to predict to the left and right of the target. +// If not specified, defaults to 5 +func SkipgramWindowSize(value int64) SkipgramAttr { return func(m optionalAttr) { - m["memory_limit"] = value + m["window_size"] = value } } -// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { +// SkipgramMinCount sets the optional min_count attribute to value. +// +// value: The minimum number of word occurrences for it to be included in the +// vocabulary. +// If not specified, defaults to 5 +func SkipgramMinCount(value int64) SkipgramAttr { return func(m optionalAttr) { - m["container"] = value + m["min_count"] = value } } -// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { +// SkipgramSubsample sets the optional subsample attribute to value. +// +// value: Threshold for word occurrence. Words that appear with higher +// frequency will be randomly down-sampled. Set to 0 to disable. +// If not specified, defaults to 0.001 +func SkipgramSubsample(value float32) SkipgramAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["subsample"] = value } } -// Op returns the number of incomplete elements in the underlying container. -func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { +// Parses a text file and creates a batch of examples. +// +// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result +// +// Arguments: +// filename: The corpus's text file name. +// batch_size: The size of produced batch. +// +// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. +func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapIncompleteSize", + Type: "Skipgram", Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) +// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal. +type ParameterizedTruncatedNormalAttr func(optionalAttr) -// RandomShuffleSeed sets the optional seed attribute to value. +// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value. // // value: If either `seed` or `seed2` are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { +func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr { return func(m optionalAttr) { m["seed"] = value } } -// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value. // // value: A second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { +func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr { return func(m optionalAttr) { m["seed2"] = value } } -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: +// Outputs random values from a normal distribution. The parameters may each be a // -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` +// scalar which applies to the entire output, or a vector of length shape[0] which +// stores the parameters for each batch. // // Arguments: -// value: The tensor to be shuffled. +// shape: The shape of the output tensor. Batches are indexed by the 0th dimension. +// means: The mean parameter of each batch. +// stdevs: The standard deviation parameter of each batch. Must be greater than 0. +// minvals: The minimum cutoff. May be -infinity. +// maxvals: The maximum cutoff. May be +infinity, and must be more than the minval +// for each batch. // -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { +// Returns A matrix of shape num_batches x samples_per_batch, filled with random +// truncated normal values using the parameters for each row. +func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9909,9 +9890,9 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "RandomShuffle", + Type: "ParameterizedTruncatedNormal", Input: []tf.Input{ - value, + shape, means, stdevs, minvals, maxvals, }, Attrs: attrs, } @@ -9919,39 +9900,48 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) return op.Output(0) } -// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel. -type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr) +// RandomUniformIntAttr is an optional argument to RandomUniformInt. +type RandomUniformIntAttr func(optionalAttr) -// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr { +// RandomUniformIntSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformIntSeed(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["num_bits"] = value + m["seed"] = value } } -// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr { +// RandomUniformIntSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["narrow_range"] = value + m["seed2"] = value } } -// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`, +// Outputs random integers from a uniform distribution. // -// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]` -// to 'outputs' tensor of same shape as `inputs`. +// The generated values are uniform integers in the range `[minval, maxval)`. +// The lower bound `minval` is included in the range, while the upper bound +// `maxval` is excluded. // -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// The random integers are slightly biased unless `maxval - minval` is an exact +// power of two. The bias is small for values of `maxval - minval` significantly +// smaller than the range of the output (either `2^32` or `2^64`). // -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) { +// Arguments: +// shape: The shape of the output tensor. +// minval: 0-D. Inclusive lower bound on the generated integers. +// maxval: 0-D. Exclusive upper bound on the generated integers. +// +// Returns A tensor of the specified shape filled with uniform random integers. +func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9960,9 +9950,9 @@ func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsPerChannel", + Type: "RandomUniformInt", Input: []tf.Input{ - inputs, min, max, + shape, minval, maxval, }, Attrs: attrs, } @@ -9970,269 +9960,358 @@ func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Ou return op.Output(0) } -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. +// Convert JSON-encoded Example records to binary protocol buffer strings. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value +// This op translates a tensor containing Example records, encoded using +// the [standard JSON +// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), +// into a tensor containing the same records encoded as binary protocol +// buffers. The resulting tensor can then be fed to any of the other +// Example-parsing ops. +// +// Arguments: +// json_examples: Each string is a JSON object serialized according to the JSON +// mapping of the Example proto. +// +// Returns Each string is a binary Example protocol buffer corresponding +// to the respective element of `json_examples`. +func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DecodeJSONExample", + Input: []tf.Input{ + json_examples, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// Adds sparse updates to the variable referenced by `resource`. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value +// This operation computes +// +// # Scalar indices +// ref[indices, ...] += updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] += updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterAdd", + Input: []tf.Input{ + resource, indices, updates, + }, } + return scope.AddOperation(opspec) } -// Outputs random values from a truncated normal distribution. +// Delete the TensorArray from its resource container. // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// This enables the user to close and release the resource in the middle +// of a step/run. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). // -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { +// Returns the created operation. +func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TruncatedNormal", + Type: "TensorArrayCloseV3", Input: []tf.Input{ - shape, + handle, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) +// ResourceGatherAttr is an optional argument to ResourceGather. +type ResourceGatherAttr func(optionalAttr) -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { +// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["validate_indices"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// Gather slices from the variable pointed to by `resource` according to `indices`. // -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] // -// lr_power: Scaling factor. Must be a scalar. +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] // -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] +// ``` +func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "ResourceGather", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + resource, indices, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) - -// SkipgramWindowSize sets the optional window_size attribute to value. -// -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["window_size"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// SkipgramMinCount sets the optional min_count attribute to value. -// -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["min_count"] = value - } -} +// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. +type QuantizedConv2DAttr func(optionalAttr) -// SkipgramSubsample sets the optional subsample attribute to value. -// -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { +// QuantizedConv2DOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { return func(m optionalAttr) { - m["subsample"] = value + m["out_type"] = value } } -// Parses a text file and creates a batch of examples. +// Computes a 2D convolution given quantized 4D input and filter tensors. // -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result +// The inputs are quantized tensors where the lowest value represents the real +// number of the associated minimum, and the highest represents the maximum. +// This means that you can only interpret the quantized output in the same way, by +// taking the returned minimum and maximum values into account. // // Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. // -// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { +// filter: filter's input_depth dimension must match input's depth dimensions. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_filter: The float value that the lowest quantized filter value represents. +// max_filter: The float value that the highest quantized filter value represents. +// strides: The stride of the sliding window for each dimension of the input +// tensor. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Skipgram", - + Type: "QuantizedConv2D", + Input: []tf.Input{ + input, filter, min_input, max_input, min_filter, max_filter, + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) + return op.Output(0), op.Output(1), op.Output(2) } -// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal. -type ParameterizedTruncatedNormalAttr func(optionalAttr) +// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. +type QueueDequeueV2Attr func(optionalAttr) -// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value. +// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr { +// value: If the queue is empty, this operation will block for up to +// timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { return func(m optionalAttr) { - m["seed"] = value + m["timeout_ms"] = value } } -// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value. +// Dequeues a tuple of one or more tensors from the given queue. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a normal distribution. The parameters may each be a +// This operation has k outputs, where k is the number of components +// in the tuples stored in the given queue, and output i is the ith +// component of the dequeued tuple. // -// scalar which applies to the entire output, or a vector of length shape[0] which -// stores the parameters for each batch. +// N.B. If the queue is empty, this operation will block until an element +// has been dequeued (or 'timeout_ms' elapses, if specified). // // Arguments: -// shape: The shape of the output tensor. Batches are indexed by the 0th dimension. -// means: The mean parameter of each batch. -// stdevs: The standard deviation parameter of each batch. Must be greater than 0. -// minvals: The minimum cutoff. May be -infinity. -// maxvals: The maximum cutoff. May be +infinity, and must be more than the minval -// for each batch. +// handle: The handle to a queue. +// component_types: The type of each component in a tuple. // -// Returns A matrix of shape num_batches x samples_per_batch, filled with random -// truncated normal values using the parameters for each row. -func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) { +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ParameterizedTruncatedNormal", + Type: "QueueDequeueV2", Input: []tf.Input{ - shape, means, stdevs, minvals, maxvals, + handle, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueV2", err) + return + } + return components } -// EncodePngAttr is an optional argument to EncodePng. -type EncodePngAttr func(optionalAttr) +// ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample. +type ParseSingleSequenceExampleAttr func(optionalAttr) -// EncodePngCompression sets the optional compression attribute to value. +// ParseSingleSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. // -// value: Compression level. -// If not specified, defaults to -1 -func EncodePngCompression(value int64) EncodePngAttr { +// value: A list of Ncontext_sparse types; the data types of data in +// each context Feature given in context_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { return func(m optionalAttr) { - m["compression"] = value + m["context_sparse_types"] = value } } -// PNG-encode an image. +// ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. +// If not specified, defaults to <> // -// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` -// where `channels` is: +// REQUIRES: len(value) >= 0 +func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_dense_types"] = value + } +} + +// ParseSingleSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. // -// * 1: for grayscale. -// * 2: for grayscale + alpha. -// * 3: for RGB. -// * 4: for RGBA. +// value: A list of Ncontext_dense shapes; the shapes of data in +// each context Feature given in context_dense_keys. +// The number of elements in the Feature corresponding to context_dense_key[j] +// must always equal context_dense_shapes[j].NumEntries(). +// The shape of context_dense_values[j] will match context_dense_shapes[j]. +// If not specified, defaults to <> // -// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder -// default or a value from 0 to 9. 9 is the highest compression level, generating -// the smallest output, but is slower. +// REQUIRES: len(value) >= 0 +func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { + return func(m optionalAttr) { + m["context_dense_shapes"] = value + } +} + +// ParseSingleSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. // -// Arguments: -// image: 3-D with shape `[height, width, channels]`. +// value: A list of Nfeature_list_sparse types; the data types +// of data in each FeatureList given in feature_list_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> // -// Returns 0-D. PNG-encoded image. -func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { +// REQUIRES: len(value) >= 0 +func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_sparse_types"] = value + } +} + +// ParseSingleSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. +// +// value: A list of Nfeature_list_dense shapes; the shapes of +// data in each FeatureList given in feature_list_dense_keys. +// The shape of each Feature in the FeatureList corresponding to +// feature_list_dense_key[j] must always equal +// feature_list_dense_shapes[j].NumEntries(). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { + return func(m optionalAttr) { + m["feature_list_dense_shapes"] = value + } +} + +// Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors. +// +// Arguments: +// serialized: A scalar containing a binary serialized SequenceExample proto. +// feature_list_dense_missing_assumed_empty: A vector listing the +// FeatureList keys which may be missing from the SequenceExample. If the +// associated FeatureList is missing, it is treated as empty. By default, +// any FeatureList not listed in this vector must exist in the SequenceExample. +// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). +// The keys expected in the Examples' features associated with context_sparse +// values. +// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' context features associated with +// dense values. +// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors +// (scalars). The keys expected in the FeatureLists associated with sparse +// values. +// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' feature_lists associated +// with lists of dense values. +// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). +// context_dense_defaults[j] provides default values +// when the SequenceExample's context map lacks context_dense_key[j]. +// If an empty Tensor is provided for context_dense_defaults[j], +// then the Feature context_dense_keys[j] is required. +// The input type is inferred from context_dense_defaults[j], even when it's +// empty. If context_dense_defaults[j] is not empty, its shape must match +// context_dense_shapes[j]. +// debug_name: A scalar containing the name of the serialized proto. +// May contain, for example, table key (descriptive) name for the +// corresponding serialized proto. This is purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty scalar if no name is available. +func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_sparse_keys []tf.Output, context_dense_keys []tf.Output, feature_list_sparse_keys []tf.Output, feature_list_dense_keys []tf.Output, context_dense_defaults []tf.Output, debug_name tf.Output, optional ...ParseSingleSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output) { if scope.Err() != nil { return } @@ -10241,58 +10320,94 @@ func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (conten a(attrs) } opspec := tf.OpSpec{ - Type: "EncodePng", + Type: "ParseSingleSequenceExample", Input: []tf.Input{ - image, + serialized, feature_list_dense_missing_assumed_empty, tf.OutputList(context_sparse_keys), tf.OutputList(context_dense_keys), tf.OutputList(feature_list_sparse_keys), tf.OutputList(feature_list_dense_keys), tf.OutputList(context_dense_defaults), debug_name, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { + scope.UpdateErr("ParseSingleSequenceExample", err) + return + } + return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values } -// RandomUniformIntAttr is an optional argument to RandomUniformInt. -type RandomUniformIntAttr func(optionalAttr) +// RandomGammaAttr is an optional argument to RandomGamma. +type RandomGammaAttr func(optionalAttr) -// RandomUniformIntSeed sets the optional seed attribute to value. +// RandomGammaSeed sets the optional seed attribute to value. // // value: If either `seed` or `seed2` are set to be non-zero, the random number // generator is seeded by the given seed. Otherwise, it is seeded by a // random seed. // If not specified, defaults to 0 -func RandomUniformIntSeed(value int64) RandomUniformIntAttr { +func RandomGammaSeed(value int64) RandomGammaAttr { return func(m optionalAttr) { m["seed"] = value } } -// RandomUniformIntSeed2 sets the optional seed2 attribute to value. +// RandomGammaSeed2 sets the optional seed2 attribute to value. // // value: A second seed to avoid seed collision. // If not specified, defaults to 0 -func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { +func RandomGammaSeed2(value int64) RandomGammaAttr { return func(m optionalAttr) { m["seed2"] = value } } -// Outputs random integers from a uniform distribution. -// -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. +// Outputs random values from the Gamma distribution(s) described by alpha. // -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). +// This op uses the algorithm by Marsaglia et al. to acquire samples via +// transformation-rejection from pairs of uniform and normal random variables. +// See http://dl.acm.org/citation.cfm?id=358414 // // Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in alpha. +// alpha: A tensor in which each scalar is a "shape" parameter describing the +// associated gamma distribution. // -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { +// Returns A tensor with shape `shape + shape(alpha)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. +func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -10301,9 +10416,150 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf a(attrs) } opspec := tf.OpSpec{ - Type: "RandomUniformInt", + Type: "RandomGamma", Input: []tf.Input{ - shape, minval, maxval, + shape, alpha, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the gradient for the inverse of `x` wrt its input. +// +// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` +// is the corresponding input gradient. +func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReciprocalGrad", + Input: []tf.Input{ + y, dy, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset with a range of values. Corresponds to python's xrange. +// +// Arguments: +// start: corresponds to start in python's xrange(). +// stop: corresponds to stop in python's xrange(). +// step: corresponds to step in python's xrange(). +// +// +func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "RangeDataset", + Input: []tf.Input{ + start, stop, step, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Saves tensors in V2 checkpoint format. +// +// By default, saves the named tensors in full. If the caller wishes to save +// specific slices of full tensors, "shape_and_slices" should be non-empty strings +// and correspondingly well-formed. +// +// Arguments: +// prefix: Must have a single element. The prefix of the V2 checkpoint to which we +// write the tensors. +// tensor_names: shape {N}. The names of the tensors to be saved. +// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. +// Empty strings indicate that they are non-partitioned tensors. +// tensors: `N` tensors to save. +// +// Returns the created operation. +func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SaveV2", + Input: []tf.Input{ + prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), + }, + } + return scope.AddOperation(opspec) +} + +// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. +type MatrixTriangularSolveAttr func(optionalAttr) + +// MatrixTriangularSolveLower sets the optional lower attribute to value. +// +// value: Boolean indicating whether the innermost matrices in `matrix` are +// lower or upper triangular. +// If not specified, defaults to true +func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { + return func(m optionalAttr) { + m["lower"] = value + } +} + +// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. +// +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// +// @compatibility(numpy) +// Equivalent to np.linalg.triangular_solve +// @end_compatibility +// If not specified, defaults to false +func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { + return func(m optionalAttr) { + m["adjoint"] = value + } +} + +// Solves systems of linear equations with upper or lower triangular matrices by +// +// backsubstitution. +// +// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +// square matrices. If `lower` is `True` then the strictly upper triangular part +// of each inner-most matrix is assumed to be zero and not accessed. +// If `lower` is False then the strictly lower triangular part of each inner-most +// matrix is assumed to be zero and not accessed. +// `rhs` is a tensor of shape `[..., M, K]`. +// +// The output is a tensor of shape `[..., M, K]`. If `adjoint` is +// `True` then the innermost matrices in `output` satisfy matrix equations +// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `False` then the strictly then the innermost matrices in +// `output` satisfy matrix equations +// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. +// +// Arguments: +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. +// +// Returns Shape is `[..., M, K]`. +func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixTriangularSolve", + Input: []tf.Input{ + matrix, rhs, }, Attrs: attrs, } @@ -10395,90 +10651,6 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM return op.Output(0) } -// MaxPoolV2Attr is an optional argument to MaxPoolV2. -type MaxPoolV2Attr func(optionalAttr) - -// MaxPoolV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolV2", - Input: []tf.Input{ - input, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. Prefer `tf.nn.batch_normalization`. -// -// Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} - opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", - Input: []tf.Input{ - t, m, v, beta, gamma, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SdcaOptimizerAttr is an optional argument to SdcaOptimizer. type SdcaOptimizerAttr func(optionalAttr) @@ -10963,17 +11135,62 @@ func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_ return op.Output(0), op.Output(1), op.Output(2) } -// Computes the sign and the log of the absolute value of the determinant of +// SumAttr is an optional argument to Sum. +type SumAttr func(optionalAttr) + +// SumKeepDims sets the optional keep_dims attribute to value. // -// one or more square matrices. +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SumKeepDims(value bool) SumAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the sum of elements across dimensions of a tensor. // -// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions -// form square matrices. The outputs are two tensors containing the signs and -// absolute values of the log determinants for all N input submatrices -// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). -// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU -// is the LU decomposition of the input and P is the corresponding -// permutation matrix. +// Reduces `input` along the dimensions given in `reduction_indices`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// reduction_indices: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Sum(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...SumAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Sum", + Input: []tf.Input{ + input, reduction_indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sign and the log of the absolute value of the determinant of +// +// one or more square matrices. +// +// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions +// form square matrices. The outputs are two tensors containing the signs and +// absolute values of the log determinants for all N input submatrices +// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). +// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU +// is the LU decomposition of the input and P is the corresponding +// permutation matrix. // // Arguments: // input: Shape is `[N, M, M]`. @@ -11071,6 +11288,29 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val return op.Output(0), op.Output(1) } +// Assigns a new value to a variable. +// +// Any ReadVariableOp with a control dependency on this op is guaranteed to return +// this value or a subsequent newer value of the variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value to set the new tensor to use. +// +// Returns the created operation. +func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignVariableOp", + Input: []tf.Input{ + resource, value, + }, + } + return scope.AddOperation(opspec) +} + // Says whether the targets are in the top `K` predictions. // // This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the @@ -12763,6 +13003,90 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp return op.Output(0) } +// Batch normalization. +// +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + opspec := tf.OpSpec{ + Type: "BatchNormWithGlobalNormalization", + Input: []tf.Input{ + t, m, v, beta, gamma, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MaxPoolV2Attr is an optional argument to MaxPoolV2. +type MaxPoolV2Attr func(optionalAttr) + +// MaxPoolV2DataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs max pooling on the input. +// +// Arguments: +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolV2", + Input: []tf.Input{ + input, ksize, strides, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. type OrderedMapUnstageNoKeyAttr func(optionalAttr) @@ -12835,52 +13159,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp return key, values } -// MaxPoolAttr is an optional argument to MaxPool. -type MaxPoolAttr func(optionalAttr) - -// MaxPoolDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolDataFormat(value string) MaxPoolAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Merges summaries. // // This op creates a @@ -13867,24 +14145,6 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max return op.Output(0), op.Output(1), op.Output(2) } -// Computes the gradient for the inverse of `x` wrt its input. -// -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReciprocalGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Reverses specific dimensions of a tensor. // // NOTE `tf.reverse` has now changed behavior in preparation for 1.0. @@ -14077,35 +14337,6 @@ func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, def return op.Output(0) } -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// input of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input of `max_pool`. -func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradWithArgmax", - Input: []tf.Input{ - input, grad, argmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) @@ -14535,84 +14766,7 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul return scope.AddOperation(opspec) } -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) - -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { - return func(m optionalAttr) { - m["Targmax"] = value - } -} - -// Performs max pooling on the input and outputs both max values and indices. -// -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. -// -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform over the inner-most -// dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.fft -// @end_compatibility -func FFT(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FFT", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softmax cross entropy cost and gradients to backpropagate. +// Computes softmax cross entropy cost and gradients to backpropagate. // // Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept // a matrix of label probabilities, but rather a single label per row @@ -14990,6 +15144,46 @@ func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa return op.Output(0) } +// VariableShapeAttr is an optional argument to VariableShape. +type VariableShapeAttr func(optionalAttr) + +// VariableShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func VariableShapeOutType(value tf.DataType) VariableShapeAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Returns the shape of the variable pointed to by `resource`. +// +// This operation returns a 1-D integer tensor representing the shape of `input`. +// +// For example: +// +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] +// ``` +func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "VariableShape", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // StringJoinAttr is an optional argument to StringJoin. type StringJoinAttr func(optionalAttr) @@ -15600,132 +15794,6 @@ func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. -type MaxPoolGradGradV2Attr func(optionalAttr) - -// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adjust the saturation of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpretted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A scale is then applied all the saturation -// values, and then remapped back to RGB colorspace. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// scale: A float scale to add to the saturation. -// -// Returns The hue-adjusted image or images. -func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustSaturation", - Input: []tf.Input{ - images, scale, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. -type SelfAdjointEigV2Attr func(optionalAttr) - -// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. -// -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { - return func(m optionalAttr) { - m["compute_v"] = value - } -} - -// Computes the eigen decomposition of one or more square self-adjoint matrices. -// -// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. -// -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = self_adjoint_eig(a) -// e = self_adjoint_eig(a, compute_v=False) -// ``` -// -// Arguments: -// input: `Tensor` input of shape `[N, N]`. -// -// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. -func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SelfAdjointEigV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. type SampleDistortedBoundingBoxAttr func(optionalAttr) @@ -16519,28 +16587,74 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) ( return op.Output(0) } -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. -type StatelessRandomUniformAttr func(optionalAttr) +// VarHandleOpAttr is an optional argument to VarHandleOp. +type VarHandleOpAttr func(optionalAttr) -// StatelessRandomUniformDtype sets the optional dtype attribute to value. +// VarHandleOpContainer sets the optional container attribute to value. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { +// value: the container this variable is placed in. +// If not specified, defaults to "" +func VarHandleOpContainer(value string) VarHandleOpAttr { return func(m optionalAttr) { - m["dtype"] = value + m["container"] = value + } +} + +// VarHandleOpSharedName sets the optional shared_name attribute to value. +// +// value: the name by which this variable is referred to. +// If not specified, defaults to "" +func VarHandleOpSharedName(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a Variable resource. +// +// Arguments: +// dtype: the type of this variable. Must agree with the dtypes +// of all ops using this variable. +// shape: The (possibly partially specified) shape of this variable. +func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "VarHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Fact", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. +type StatelessRandomUniformAttr func(optionalAttr) + +// StatelessRandomUniformDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { + return func(m optionalAttr) { + m["dtype"] = value } } @@ -17121,129 +17235,6 @@ func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, l return scope.AddOperation(opspec) } -// LRNGradAttr is an optional argument to LRNGrad. -type LRNGradAttr func(optionalAttr) - -// LRNGradDepthRadius sets the optional depth_radius attribute to value. -// -// value: A depth radius. -// If not specified, defaults to 5 -func LRNGradDepthRadius(value int64) LRNGradAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNGradBias sets the optional bias attribute to value. -// -// value: An offset (usually > 0 to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNGradBias(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNGradAlpha sets the optional alpha attribute to value. -// -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNGradAlpha(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// LRNGradBeta sets the optional beta attribute to value. -// -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNGradBeta(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["beta"] = value - } -} - -// Gradients for Local Response Normalization. -// -// Arguments: -// input_grads: 4-D with shape `[batch, height, width, channels]`. -// input_image: 4-D with shape `[batch, height, width, channels]`. -// output_image: 4-D with shape `[batch, height, width, channels]`. -// -// Returns The gradients for LRN. -func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LRNGrad", - Input: []tf.Input{ - input_grads, input_image, output_image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Converts each string in the input Tensor to the specified numeric type. -// -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringToNumber", - Input: []tf.Input{ - string_tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of NOT x element-wise. -func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalNot", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. type AvgPool3DGradAttr func(optionalAttr) @@ -17292,6 +17283,34 @@ func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksi return op.Output(0) } +// Inverse fast Fourier transform. +// +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. +// +// Arguments: +// input: A complex64 tensor. +// +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Creates a dataset that batches `batch_size` elements from `input_dataset`. // // Arguments: @@ -18381,34 +18400,6 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option return op.Output(0) } -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes the sum along sparse segments of a tensor divided by the sqrt of N. // // N is the size of the segment being reduced. @@ -18785,122 +18776,17 @@ func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segm return op.Output(0) } -// Converts one or more images from RGB to HSV. +// Applies sparse addition to `input` using individual values or slices // -// Outputs a tensor of the same shape as the `images` tensor, containing the HSV -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. +// from `updates` according to indices `indices`. The updates are non-aliasing: +// `input` is only modified in-place if no other operations will use it. +// Otherwise, a copy of `input` is made. This operation has a gradient with +// respect to both `input` and `updates`. // -// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and -// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 -// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. +// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. // -// Arguments: -// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. -// -// Returns `images` converted to HSV. -func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RGBToHSV", - Input: []tf.Input{ - images, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) - -// MatrixSolveLsFast sets the optional fast attribute to value. -// If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { - return func(m optionalAttr) { - m["fast"] = value - } -} - -// Solves one or more linear least-squares problems. -// -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. -// -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: -// -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). -// -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + -// \lambda ||Z||_F^2\\). If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or\\(\lambda\\) is -// sufficiently large. -// -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. -// -// Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. -// -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility -// -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolveLs", - Input: []tf.Input{ - matrix, rhs, l2_regularizer, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Applies sparse addition to `input` using individual values or slices -// -// from `updates` according to indices `indices`. The updates are non-aliasing: -// `input` is only modified in-place if no other operations will use it. -// Otherwise, a copy of `input` is made. This operation has a gradient with -// respect to both `input` and `updates`. -// -// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `input`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// `indices` must be integer tensor, containing indices into `input`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. // // The innermost dimension of `indices` (with length `K`) corresponds to // indices into elements (if `K = P`) or `(P-K)`-dimensional slices @@ -19191,185 +19077,502 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt return op.Output(0) } -// SvdAttr is an optional argument to Svd. -type SvdAttr func(optionalAttr) +// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). +// +// For each entry in `x`, calculates the number of `1` (on) bits in the binary +// representation of that entry. +// +// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into +// `int32` or `int64` and perform the bitcount on the result, than to feed in +// 8- or 16-bit inputs and then aggregate the resulting counts. +func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "PopulationCount", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// SvdComputeUv sets the optional compute_uv attribute to value. +// AssertAttr is an optional argument to Assert. +type AssertAttr func(optionalAttr) + +// AssertSummarize sets the optional summarize attribute to value. // -// value: If true, left and right singular vectors will be -// computed and returned in `u` and `v`, respectively. -// If false, `u` and `v` are not set and should never referenced. -// If not specified, defaults to true -func SvdComputeUv(value bool) SvdAttr { +// value: Print this many entries of each tensor. +// If not specified, defaults to 3 +func AssertSummarize(value int64) AssertAttr { return func(m optionalAttr) { - m["compute_uv"] = value + m["summarize"] = value } } -// SvdFullMatrices sets the optional full_matrices attribute to value. +// Asserts that the given condition is true. // -// value: If true, compute full-sized `u` and `v`. If false -// (the default), compute only the leading `P` singular vectors. -// Ignored if `compute_uv` is `False`. +// If `condition` evaluates to false, print the list of tensors in `data`. +// `summarize` determines how many entries of the tensors to print. +// +// Arguments: +// condition: The condition to evaluate. +// data: The tensors to print out when condition is false. +// +// Returns the created operation. +func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Assert", + Input: []tf.Input{ + condition, tf.OutputList(data), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// RandomUniformAttr is an optional argument to RandomUniform. +type RandomUniformAttr func(optionalAttr) + +// RandomUniformSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformSeed(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomUniformSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformSeed2(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with uniform random values. +func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomUniform", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. +type ResourceApplyFtrlAttr func(optionalAttr) + +// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func SvdFullMatrices(value bool) SvdAttr { +func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. +// +// accum_new = accum + grad * grad +// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 regulariation. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrl", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// AnyAttr is an optional argument to Any. +type AnyAttr func(optionalAttr) + +// AnyKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AnyKeepDims(value bool) AnyAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the "logical or" of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `reduction_indices`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// reduction_indices: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Any(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...AnyAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Any", + Input: []tf.Input{ + input, reduction_indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). +// +// The Hurwitz zeta function is defined as: +// +// +// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) +func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Zeta", + Input: []tf.Input{ + x, q, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse real-valued fast Fourier transform. +// +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. +// +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. +// +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IRFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Adds a value to the current value of a variable. +// +// Any ReadVariableOp which depends directly or indirectly on this assign is +// guaranteed to see the incremented value or a subsequent newer one. +// +// Outputs the incremented value, which can be used to totally order the +// increments to this variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignAddVariableOp", + Input: []tf.Input{ + resource, value, + }, + } + return scope.AddOperation(opspec) +} + +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Asinh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Real-valued fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. +// +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// OrderedMapStageAttr is an optional argument to OrderedMapStage. +type OrderedMapStageAttr func(optionalAttr) + +// OrderedMapStageCapacity sets the optional capacity attribute to value. +// +// value: Maximum number of elements in the Staging Area. If > 0, inserts +// on the container will block when the capacity is reached. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapStageContainer sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. Otherwise, +// a default container is used. +// If not specified, defaults to "" +func OrderedMapStageContainer(value string) OrderedMapStageAttr { return func(m optionalAttr) { - m["full_matrices"] = value + m["container"] = value } } -// Computes the singular value decompositions of one or more matrices. +// OrderedMapStageSharedName sets the optional shared_name attribute to value. // -// Computes the SVD of each inner matrix in `input` such that -// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` +// value: It is necessary to match this name to the matching Unstage Op. +// If not specified, defaults to "" +func OrderedMapStageSharedName(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Stage (key, values) in the underlying container which behaves like a ordered // -// ```python -// # a is a tensor containing a batch of matrices. -// # s is a tensor of singular values for each matrix. -// # u is the tensor containing of left singular vectors for each matrix. -// # v is the tensor containing of right singular vectors for each matrix. -// s, u, v = svd(a) -// s, _, _ = svd(a, compute_uv=False) -// ``` +// associative container. Elements are ordered by key. // // Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// key: int64 // -// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. -// Undefined if `compute_uv` is false. -func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { +// values: a list of tensors +// dtypes A list of data types that inserted values should adhere to. +// +// +// Returns the created operation. +func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Svd", + Type: "OrderedMapStage", Input: []tf.Input{ - input, + key, indices, tf.OutputList(values), }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). -// -// For each entry in `x`, calculates the number of `1` (on) bits in the binary -// representation of that entry. +// Computes the gradient for the tanh of `x` wrt its input. // -// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into -// `int32` or `int64` and perform the bitcount on the result, than to feed in -// 8- or 16-bit inputs and then aggregate the resulting counts. -func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { +// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` +// is the corresponding input gradient. +func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "PopulationCount", + Type: "TanhGrad", Input: []tf.Input{ - x, + y, dy, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) - -// AssertSummarize sets the optional summarize attribute to value. +// Outputs all keys and values in the table. // -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Asserts that the given condition is true. +// Arguments: +// table_handle: Handle to the table. // -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. // -// Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. // -// Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { +// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. +func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "Assert", + Type: "LookupTableExportV2", Input: []tf.Input{ - condition, tf.OutputList(data), + table_handle, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) - -// RandomUniformSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed2"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Outputs random values from a uniform distribution. +// Converts each string in the input Tensor to its hash mod by a number of buckets. // -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. // -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "RandomUniform", + Type: "StringToHashBucketFast", Input: []tf.Input{ - shape, + input, }, Attrs: attrs, } @@ -19377,156 +19580,149 @@ func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional .. return op.Output(0) } -// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. -type ResourceApplyFtrlAttr func(optionalAttr) +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) -// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["element_shape"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// Gather specific elements from the TensorArray into output `value`. // -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// All elements selected by `indices` must have the same shape. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 regulariation. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Returns the created operation. -func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrl", + Type: "TensorArrayGatherV3", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, lr_power, + handle, indices, flow_in, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// AnyAttr is an optional argument to Any. -type AnyAttr func(optionalAttr) - -// AnyKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AnyKeepDims(value bool) AnyAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the "logical or" of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `reduction_indices`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// reduction_indices: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// Deprecated. Disallowed in GraphDef version >= 2. // -// Returns The reduced tensor. -func Any(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...AnyAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead +func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Any", + Type: "AdjustContrast", Input: []tf.Input{ - input, reduction_indices, + images, contrast_factor, min_value, max_value, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. +type MaxPoolGradGradAttr func(optionalAttr) + +// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } } -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). -// -// The Hurwitz zeta function is defined as: +// Computes second-order gradients of the maxpooling function. // +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Zeta", + Type: "MaxPoolGradGrad", Input: []tf.Input{ - x, q, + orig_input, orig_output, grad, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse real-valued fast Fourier transform. +// 3D real-valued fast Fourier transform. // -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. +// Computes the 3-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 3 dimensions of `input`. // -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. // -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. // -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. +// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 +// dimensions of `input` are replaced with the their 3D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. // // @compatibility(numpy) -// Equivalent to np.fft.irfft +// Equivalent to np.fft.rfftn with 3 dimensions. // @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IRFFT", + Type: "RFFT3D", Input: []tf.Input{ input, fft_length, }, @@ -19535,111 +19731,85 @@ func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Outpu return op.Output(0) } -// Creates a dataset with a range of values. Corresponds to python's xrange. -// -// Arguments: -// start: corresponds to start in python's xrange(). -// stop: corresponds to stop in python's xrange(). -// step: corresponds to step in python's xrange(). -// +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. // -func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RangeDataset", - Input: []tf.Input{ - start, stop, step, - }, - Attrs: attrs, +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Saves tensors in V2 checkpoint format. +// Restores a tensor from checkpoint files. // -// By default, saves the named tensors in full. If the caller wishes to save -// specific slices of full tensors, "shape_and_slices" should be non-empty strings -// and correspondingly well-formed. +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. +// +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. // // Arguments: -// prefix: Must have a single element. The prefix of the V2 checkpoint to which we -// write the tensors. -// tensor_names: shape {N}. The names of the tensors to be saved. -// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. -// Empty strings indicate that they are non-partitioned tensors. -// tensors: `N` tensors to save. +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. // -// Returns the created operation. -func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SaveV2", + Type: "RestoreSlice", Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), + file_pattern, tensor_name, shape_and_slice, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. -type MatrixTriangularSolveAttr func(optionalAttr) - -// MatrixTriangularSolveLower sets the optional lower attribute to value. -// -// value: Boolean indicating whether the innermost matrices in `matrix` are -// lower or upper triangular. -// If not specified, defaults to true -func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { - return func(m optionalAttr) { - m["lower"] = value - } -} +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) -// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. // -// @compatibility(numpy) -// Equivalent to np.linalg.triangular_solve -// @end_compatibility -// If not specified, defaults to false -func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["dtype"] = value } } -// Solves systems of linear equations with upper or lower triangular matrices by -// -// backsubstitution. +// Outputs deterministic pseudorandom values from a truncated normal distribution. // -// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form -// square matrices. If `lower` is `True` then the strictly upper triangular part -// of each inner-most matrix is assumed to be zero and not accessed. -// If `lower` is False then the strictly lower triangular part of each inner-most -// matrix is assumed to be zero and not accessed. -// `rhs` is a tensor of shape `[..., M, K]`. +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // -// The output is a tensor of shape `[..., M, K]`. If `adjoint` is -// `True` then the innermost matrices in `output` satisfy matrix equations -// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `False` then the strictly then the innermost matrices in -// `output` satisfy matrix equations -// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. +// The outputs are a deterministic function of `shape` and `seed`. // // Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). // -// Returns Shape is `[..., M, K]`. -func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { +// Returns Random values with specified shape. +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -19648,9 +19818,9 @@ func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, option a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixTriangularSolve", + Type: "StatelessTruncatedNormal", Input: []tf.Input{ - matrix, rhs, + shape, seed, }, Attrs: attrs, } @@ -19658,338 +19828,282 @@ func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, option return op.Output(0) } -// Adds a value to the current value of a variable. +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) + +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { + return func(m optionalAttr) { + m["out_idx"] = value + } +} + +// Finds unique elements in a 1-D tensor. // -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: // -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// For example: // -// Returns the created operation. -func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignAddVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns 1-D.1-D.1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Asinh", + Type: "UniqueWithCounts", Input: []tf.Input{ x, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. -// -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Creates a dataset that skips `count` elements from the `input_dataset`. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. // -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "RFFT", + Type: "SkipDataset", Input: []tf.Input{ - input, fft_length, + input_dataset, count, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// OrderedMapStageAttr is an optional argument to OrderedMapStage. -type OrderedMapStageAttr func(optionalAttr) - -// OrderedMapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapStageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func OrderedMapStageContainer(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) -// OrderedMapStageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func OrderedMapStageSharedName(value string) OrderedMapStageAttr { +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["Tout"] = value } } -// Stage (key, values) in the underlying container which behaves like a ordered -// -// associative container. Elements are ordered by key. +// Converts two real numbers to a complex number. // -// Arguments: -// key: int64 +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. // -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. +// The input tensors `real` and `imag` must have the same shape. // +// For example: // -// Returns the created operation. -func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapStage", + Type: "Complex", Input: []tf.Input{ - key, indices, tf.OutputList(values), + real, imag, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the gradient for the tanh of `x` wrt its input. +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) + +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the imaginary part of a complex number. // -// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` -// is the corresponding input gradient. -func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TanhGrad", + Type: "Imag", Input: []tf.Input{ - y, dy, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs all keys and values in the table. +// Creates a dataset that emits the lines of one or more text files. // // Arguments: -// table_handle: Handle to the table. -// -// -// -// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. -func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "LookupTableExportV2", + Type: "TextLineDataset", Input: []tf.Input{ - table_handle, + filenames, compression_type, buffer_size, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. +// Returns the number of records this Reader has produced. // -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. +// This is the same as the number of ReaderRead executions that have +// succeeded. // // Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", + Type: "ReaderNumRecordsProducedV2", Input: []tf.Input{ - input, + reader_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) - -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// Gather specific elements from the TensorArray into output `value`. -// -// All elements selected by `indices` must have the same shape. -// -// Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// Computes exponential of x - 1 element-wise. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "Expm1", Input: []tf.Input{ - handle, indices, flow_in, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deprecated. Disallowed in GraphDef version >= 2. +// Returns x - y element-wise. // -// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead -func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { +// *NOTE*: `Sub` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AdjustContrast", + Type: "Sub", Input: []tf.Input{ - images, contrast_factor, min_value, max_value, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. -type MaxPoolGradGradAttr func(optionalAttr) +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) -// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. +// StringToNumberOutType sets the optional out_type attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { return func(m optionalAttr) { - m["data_format"] = value + m["out_type"] = value } } -// Computes second-order gradients of the maxpooling function. +// Converts each string in the input Tensor to the specified numeric type. // -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolGradGrad", + Type: "StringToNumber", Input: []tf.Input{ - orig_input, orig_output, grad, + string_tensor, }, Attrs: attrs, } @@ -19997,91 +20111,84 @@ func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, return op.Output(0) } -// 3D real-valued fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 3 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. -// -// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the their 3D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfftn with 3 dimensions. -// @end_compatibility -func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns the truth value of NOT x element-wise. +func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RFFT3D", + Type: "LogicalNot", Input: []tf.Input{ - input, fft_length, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) +// LRNGradAttr is an optional argument to LRNGrad. +type LRNGradAttr func(optionalAttr) -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. +// LRNGradDepthRadius sets the optional depth_radius attribute to value. // -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { +// value: A depth radius. +// If not specified, defaults to 5 +func LRNGradDepthRadius(value int64) LRNGradAttr { return func(m optionalAttr) { - m["preferred_shard"] = value + m["depth_radius"] = value } } -// Restores a tensor from checkpoint files. +// LRNGradBias sets the optional bias attribute to value. // -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. +// value: An offset (usually > 0 to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNGradBias(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["bias"] = value + } +} + +// LRNGradAlpha sets the optional alpha attribute to value. // -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNGradAlpha(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNGradBeta sets the optional beta attribute to value. +// +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNGradBeta(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Gradients for Local Response Normalization. // // Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. +// input_grads: 4-D with shape `[batch, height, width, channels]`. +// input_image: 4-D with shape `[batch, height, width, channels]`. +// output_image: 4-D with shape `[batch, height, width, channels]`. // -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { +// Returns The gradients for LRN. +func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RestoreSlice", + Type: "LRNGrad", Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, + input_grads, input_image, output_image, }, Attrs: attrs, } @@ -20089,33 +20196,38 @@ func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, s return op.Output(0) } -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) +// EncodePngAttr is an optional argument to EncodePng. +type EncodePngAttr func(optionalAttr) -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// EncodePngCompression sets the optional compression attribute to value. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { +// value: Compression level. +// If not specified, defaults to -1 +func EncodePngCompression(value int64) EncodePngAttr { return func(m optionalAttr) { - m["dtype"] = value + m["compression"] = value } } -// Outputs deterministic pseudorandom values from a truncated normal distribution. +// PNG-encode an image. // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` +// where `channels` is: // -// The outputs are a deterministic function of `shape` and `seed`. +// * 1: for grayscale. +// * 2: for grayscale + alpha. +// * 3: for RGB. +// * 4: for RGBA. +// +// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder +// default or a value from 0 to 9. 9 is the highest compression level, generating +// the smallest output, but is slower. // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// image: 3-D with shape `[height, width, channels]`. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// Returns 0-D. PNG-encoded image. +func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { if scope.Err() != nil { return } @@ -20124,9 +20236,9 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "EncodePng", Input: []tf.Input{ - shape, seed, + image, }, Attrs: attrs, } @@ -20134,166 +20246,170 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt return op.Output(0) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) +// MaxPoolAttr is an optional argument to MaxPool. +type MaxPoolAttr func(optionalAttr) -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { +// MaxPoolDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolDataFormat(value string) MaxPoolAttr { return func(m optionalAttr) { - m["out_idx"] = value + m["data_format"] = value } } -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` +// Performs max pooling on the input. // // Arguments: -// x: 1-D. +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns 1-D.1-D.1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { +// Returns The max pooled output tensor. +func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UniqueWithCounts", + Type: "MaxPool", Input: []tf.Input{ - x, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. +// Fast Fourier transform. // -// Arguments: +// Computes the 1-dimensional discrete Fourier transform over the inner-most +// dimension of `input`. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. +// Arguments: +// input: A complex64 tensor. // +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its 1D Fourier transform. // -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.fft +// @end_compatibility +func FFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SkipDataset", + Type: "FFT", Input: []tf.Input{ - input_dataset, count, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { return func(m optionalAttr) { - m["Tout"] = value + m["Targmax"] = value } } -// Converts two real numbers to a complex number. +// Performs max pooling on the input and outputs both max values and indices. // -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. // -// The input tensors `real` and `imag` must have the same shape. +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. // -// For example: +// Arguments: +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Complex", + Type: "MaxPoolWithArgmax", Input: []tf.Input{ - real, imag, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) +// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. +type MaxPoolGradGradV2Attr func(optionalAttr) -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { +// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { return func(m optionalAttr) { - m["Tout"] = value + m["data_format"] = value } } -// Returns the imaginary part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. +// Computes second-order gradients of the maxpooling function. // -// For example: +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] -// ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Imag", + Type: "MaxPoolGradGradV2", Input: []tf.Input{ - input, + orig_input, orig_output, grad, ksize, strides, }, Attrs: attrs, } @@ -20301,79 +20417,108 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output return op.Output(0) } -// Creates a dataset that emits the lines of one or more text files. +// Adjust the saturation of one or more images. +// +// `images` is a tensor of at least 3 dimensions. The last dimension is +// interpretted as channels, and must be three. +// +// The input image is considered in the RGB colorspace. Conceptually, the RGB +// colors are first mapped into HSV. A scale is then applied all the saturation +// values, and then remapped back to RGB colorspace. // // Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { +// images: Images to adjust. At least 3-D. +// scale: A float scale to add to the saturation. +// +// Returns The hue-adjusted image or images. +func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TextLineDataset", + Type: "AdjustSaturation", Input: []tf.Input{ - filenames, compression_type, buffer_size, + images, scale, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. +// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. +type SelfAdjointEigV2Attr func(optionalAttr) + +// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. // -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, +// value: If `True` then eigenvectors will be computed and returned in `v`. +// Otherwise, only the eigenvalues will be computed. +// If not specified, defaults to true +func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { + return func(m optionalAttr) { + m["compute_v"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes exponential of x - 1 element-wise. +// Computes the eigen decomposition of one or more square self-adjoint matrices. // -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { +// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in +// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. +// +// ```python +// # a is a tensor. +// # e is a tensor of eigenvalues. +// # v is a tensor of eigenvectors. +// e, v = self_adjoint_eig(a) +// e = self_adjoint_eig(a, compute_v=False) +// ``` +// +// Arguments: +// input: `Tensor` input of shape `[N, N]`. +// +// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. +func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Expm1", + Type: "SelfAdjointEigV2", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Returns x - y element-wise. +// Computes second-order gradients of the maxpooling function. // -// *NOTE*: `Sub` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// input: The original input. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the +// input of `max_pool`. +// argmax: The indices of the maximum values chosen for each output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input of `max_pool`. +func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "Sub", + Type: "MaxPoolGradGradWithArgmax", Input: []tf.Input{ - x, y, + input, grad, argmax, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -21962,56 +22107,7 @@ func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input t opspec := tf.OpSpec{ Type: "QuantizedBiasAdd", Input: []tf.Input{ - input, bias, min_input, max_input, min_bias, max_bias, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) - -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes a 2D convolution given quantized 4D input and filter tensors. -// -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. -// -// Arguments: -// -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedConv2D", - Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, + input, bias, min_input, max_input, min_bias, max_bias, }, Attrs: attrs, } @@ -22201,6 +22297,25 @@ func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Out return op.Output(0) } +// Computes the reciprocal of x element-wise. +// +// DEPRECATED at GraphDef version 17: Use Reciprocal +// +// I.e., \\(y = 1 / x\\). +func Inv(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Inv", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // OrderedMapClearAttr is an optional argument to OrderedMapClear. type OrderedMapClearAttr func(optionalAttr) @@ -25687,57 +25802,6 @@ func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x return op.Output(0), op.Output(1), op.Output(2) } -// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. -type QueueEnqueueManyV2Attr func(optionalAttr) - -// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is too full, this operation will block for up -// to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Enqueues zero or more tuples of one or more tensors in the given queue. -// -// This operation slices each component tensor along the 0th dimension to -// make multiple queue elements. All of the tuple components must have the -// same size in the 0th dimension. -// -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. -// -// N.B. If the queue is full, this operation will block until the given -// elements have been enqueued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should -// be taken. -// -// Returns the created operation. -func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueEnqueueManyV2", - Input: []tf.Input{ - handle, tf.OutputList(components), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // Forwards the input to the output. // // This operator represents the loop termination condition used by the @@ -25872,105 +25936,6 @@ func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (outp return op.Output(0) } -// Creates a dataset that asynchronously prefetches elements from `input_dataset`. -// -// Arguments: -// -// buffer_size: The maximum number of elements to buffer in an iterator over -// this dataset. -// -// -func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "PrefetchDataset", - Input: []tf.Input{ - input_dataset, buffer_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. -// -// Arguments: -// tag: A string attached to this summary. Used for organization in TensorBoard. -// tensor: A tensor to serialize. -// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin -// data. -func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorSummaryV2", - Input: []tf.Input{ - tag, tensor, serialized_summary_metadata, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) - -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummaryV2", - Input: []tf.Input{ - tag, tensor, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -26311,6 +26276,95 @@ func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtyp return op.Output(0) } +// Partitions `data` into `num_partitions` tensors using indices from `partitions`. +// +// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` +// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` +// are placed in `outputs[i]` in lexicographic order of `js`, and the first +// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. +// In detail, +// +// ```python +// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// +// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) +// ``` +// +// `data.shape` must start with `partitions.shape`. +// +// For example: +// +// ```python +// # Scalar partitions. +// partitions = 1 +// num_partitions = 2 +// data = [10, 20] +// outputs[0] = [] # Empty with shape [0, 2] +// outputs[1] = [[10, 20]] +// +// # Vector partitions. +// partitions = [0, 0, 1, 1, 0] +// num_partitions = 2 +// data = [10, 20, 30, 40, 50] +// outputs[0] = [10, 20, 50] +// outputs[1] = [30, 40] +// ``` +// +// See `dynamic_stitch` for an example on how to merge partitions back. +// +//
+// +//
+// +// Arguments: +// +// partitions: Any shape. Indices in the range `[0, num_partitions)`. +// num_partitions: The number of partitions to output. +func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_partitions": num_partitions} + opspec := tf.OpSpec{ + Type: "DynamicPartition", + Input: []tf.Input{ + data, partitions, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("DynamicPartition", err) + return + } + return outputs +} + +// Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object. +// +// Arguments: +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SerializeSparse", + Input: []tf.Input{ + sparse_indices, sparse_values, sparse_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Table initializer that takes two tensors for keys and values respectively. // // Arguments: @@ -26453,6 +26507,105 @@ func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// Creates a dataset that asynchronously prefetches elements from `input_dataset`. +// +// Arguments: +// +// buffer_size: The maximum number of elements to buffer in an iterator over +// this dataset. +// +// +func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "PrefetchDataset", + Input: []tf.Input{ + input_dataset, buffer_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. +// +// Arguments: +// tag: A string attached to this summary. Used for organization in TensorBoard. +// tensor: A tensor to serialize. +// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin +// data. +func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorSummaryV2", + Input: []tf.Input{ + tag, tensor, serialized_summary_metadata, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. +type AudioSummaryV2Attr func(optionalAttr) + +// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Outputs a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSummaryV2", + Input: []tf.Input{ + tag, tensor, sample_rate, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the gradient for the sqrt of `x` wrt its input. // // Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` @@ -26589,156 +26742,3 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr op := scope.AddOperation(opspec) return op.Output(0) } - -// SumAttr is an optional argument to Sum. -type SumAttr func(optionalAttr) - -// SumKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SumKeepDims(value bool) SumAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the sum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `reduction_indices`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_indices`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// reduction_indices: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Sum(scope *Scope, input tf.Output, reduction_indices tf.Output, optional ...SumAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Sum", - Input: []tf.Input{ - input, reduction_indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. -// -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, -// -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] -// -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` -// -// `data.shape` must start with `partitions.shape`. -// -// For example: -// -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] -// -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] -// ``` -// -// See `dynamic_stitch` for an example on how to merge partitions back. -// -//
-// -//
-// -// Arguments: -// -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_partitions": num_partitions} - opspec := tf.OpSpec{ - Type: "DynamicPartition", - Input: []tf.Input{ - data, partitions, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs -} - -// Serialize a `SparseTensor` into a string 3-vector (1-D `Tensor`) object. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SerializeSparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the reciprocal of x element-wise. -// -// DEPRECATED at GraphDef version 17: Use Reciprocal -// -// I.e., \\(y = 1 / x\\). -func Inv(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Inv", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From e7ab55b01f25bc1c9023dcc9510667ea480c6186 Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Fri, 6 Oct 2017 08:14:46 -0700 Subject: [PATCH 101/909] SinhArcsinh distributions modified so that their skewness is symmetric. Also, some doc-fixes/changes, and make SinhArcsinh bijector have same None kwargs and naming scheme as the distributions PiperOrigin-RevId: 171294037 --- .../bijectors/sinh_arcsinh_bijector_test.py | 8 ++++- .../python/kernel_tests/sinh_arcsinh_test.py | 16 ++++++++++ .../vector_sinh_arcsinh_diag_test.py | 16 ++++++++++ .../python/ops/bijectors/sinh_arcsinh_impl.py | 20 ++++++++----- .../distributions/python/ops/sinh_arcsinh.py | 29 ++++++++++++------- .../python/ops/vector_sinh_arcsinh_diag.py | 28 ++++++++++++------ 6 files changed, 89 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py index 230dd93a2a..172c180a44 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_bijector_test.py @@ -41,7 +41,7 @@ class SinhArcsinhBijectorTest(test.TestCase): tailweight=tailweight, event_ndims=1, validate_args=True) - self.assertEqual("sinh_arcsinh", bijector.name) + self.assertEqual("SinhArcsinh", bijector.name) x = np.array([[[-2.01], [2.], [1e-4]]]).astype(np.float32) y = np.sinh((np.arcsinh(x) + skewness) * tailweight) self.assertAllClose(y, bijector.forward(x).eval()) @@ -170,6 +170,12 @@ class SinhArcsinhBijectorTest(test.TestCase): with self.assertRaisesOpError("not positive"): SinhArcsinh(tailweight=0., validate_args=True).forward(1.0).eval() + def testDefaultDtypeIsFloat32(self): + with self.test_session(): + bijector = SinhArcsinh() + self.assertEqual(bijector.tailweight.dtype, np.float32) + self.assertEqual(bijector.skewness.dtype, np.float32) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/sinh_arcsinh_test.py b/tensorflow/contrib/distributions/python/kernel_tests/sinh_arcsinh_test.py index 8ea3a59255..88b48736dd 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/sinh_arcsinh_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/sinh_arcsinh_test.py @@ -200,6 +200,22 @@ class SinhArcsinhTest(test.TestCase): sasnorm_samps = sess.run(sasnorm.sample(10000, seed=4)) np.testing.assert_array_less(loc, sasnorm_samps.mean(axis=0)) + def test_pdf_reflected_for_negative_skewness(self): + with self.test_session() as sess: + sas_pos_skew = ds.SinhArcsinh( + loc=0., + scale=1., + skewness=2., + validate_args=True) + sas_neg_skew = ds.SinhArcsinh( + loc=0., + scale=1., + skewness=-2., + validate_args=True) + x = np.linspace(-2, 2, num=5).astype(np.float32) + self.assertAllClose( + *sess.run([sas_pos_skew.prob(x), sas_neg_skew.prob(x[::-1])])) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_sinh_arcsinh_diag_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_sinh_arcsinh_diag_test.py index a7140cd98b..a5d837d454 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/vector_sinh_arcsinh_diag_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_sinh_arcsinh_diag_test.py @@ -251,6 +251,22 @@ class VectorSinhArcsinhDiagTest(test_util.VectorDistributionTestHelpers, center=0.15, rtol=0.1) + def test_pdf_reflected_for_negative_skewness(self): + with self.test_session() as sess: + sas_pos_skew = ds.VectorSinhArcsinhDiag( + loc=[0.], + scale_identity_multiplier=1., + skewness=2., + validate_args=True) + sas_neg_skew = ds.VectorSinhArcsinhDiag( + loc=[0.], + scale_identity_multiplier=1., + skewness=-2., + validate_args=True) + x = np.linspace(-2, 2, num=5).astype(np.float32).reshape(5, 1) + self.assertAllClose( + *sess.run([sas_pos_skew.prob(x), sas_neg_skew.prob(x[::-1])])) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py index dac3d812ee..3a75e4ae94 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/sinh_arcsinh_impl.py @@ -89,18 +89,18 @@ class SinhArcsinh(bijector.Bijector): """ def __init__(self, - skewness=0., - tailweight=1., + skewness=None, + tailweight=None, event_ndims=0, validate_args=False, - name="sinh_arcsinh"): + name="SinhArcsinh"): """Instantiates the `SinhArcsinh` bijector. Args: - skewness: Skewness parameter. Float-type `Tensor`. + skewness: Skewness parameter. Float-type `Tensor`. Default is `0` + of type `float32`. tailweight: Tailweight parameter. Positive `Tensor` of same `dtype` as - `skewness` - and broadcastable `shape`. + `skewness` and broadcastable `shape`. Default is `1` of type `float32`. event_ndims: Python scalar indicating the number of dimensions associated with a particular draw from the distribution. validate_args: Python `bool` indicating whether arguments should be @@ -111,8 +111,12 @@ class SinhArcsinh(bijector.Bijector): self._name = name self._validate_args = validate_args with self._name_scope("init", values=[skewness, tailweight]): - self._skewness = ops.convert_to_tensor(skewness, name="skewness") - self._tailweight = ops.convert_to_tensor(tailweight, name="tailweight") + tailweight = 1. if tailweight is None else tailweight + skewness = 0. if skewness is None else skewness + self._skewness = ops.convert_to_tensor( + skewness, name="skewness") + self._tailweight = ops.convert_to_tensor( + tailweight, name="tailweight", dtype=self._skewness.dtype) check_ops.assert_same_float_dtype([self._skewness, self._tailweight]) if validate_args: self._tailweight = control_flow_ops.with_dependencies([ diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index cdf81526da..b05f15771a 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -51,8 +51,9 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): `(loc, scale, skewness, tailweight)`, via the relation: ``` - Y := loc + scale * F(Z) * (2 / F(2)) + Y := loc + scale * F(Z) * (2 / F_0(2)) F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) + F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) ``` This distribution is similar to the location-scale transformation @@ -61,7 +62,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): * If `skewness = 0` and `tailweight = 1` (the defaults), `F(Z) = Z`, and then `Y = L(Z)` exactly. * `loc` is used in both to shift the result by a constant factor. - * Our definition of `C` ensures that + * The multiplication of `scale` by `2 / F_0(2)` ensures that if `skewness = 0` `P[Y - loc <= 2 * scale] = P[L(Z) - loc <= 2 * scale]`. Thus it can be said that the weights in the tails of `Y` and `L(Z)` beyond `loc + 2 * scale` are the same. @@ -84,12 +85,12 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): `|Z| >> (|skewness| * tailweight)**tailweight`, we have `Y approx 0.5 Z**tailweight e**(sign(Z) skewness * tailweight)`. - To see the argument about `C` and quantiles, note that + To see the argument regarding multiplying `scale` by `2 / F_0(2)`, ``` - P[(Y - loc) / scale <= 2] = P[F(Z) <= 2 * scale / C] - = P[Z <= F^{-1}(2 * scale / C)] - = P[Z <= 2]. + P[(Y - loc) / scale <= 2] = P[F(Z) * (2 / F_0(2)) <= 2] + = P[F(Z) <= F_0(2)] + = P[Z <= 2] (if F = F_0). ``` """ @@ -101,7 +102,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): distribution=None, validate_args=False, allow_nan_stats=True, - name="MultivariateNormalLinearOperator"): + name="SinhArcsinh"): """Construct SinhArcsinh distribution on `(-inf, inf)`. Arguments `(loc, scale, skewness, tailweight)` must have broadcastable shape @@ -138,6 +139,7 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): dtype = loc.dtype scale = ops.convert_to_tensor(scale, name="scale", dtype=dtype) tailweight = 1. if tailweight is None else tailweight + has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness tailweight = ops.convert_to_tensor( tailweight, name="tailweight", dtype=dtype) @@ -149,7 +151,8 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) - # C := 2 * scale / F(2) + # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) + # C := 2 * scale / F_0(2) if distribution is None: distribution = normal.Normal( loc=array_ops.zeros([], dtype=dtype), @@ -164,9 +167,15 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): # Make the SAS bijector, 'F'. f = bijectors.SinhArcsinh( skewness=skewness, tailweight=tailweight, event_ndims=0) + if has_default_skewness: + f_noskew = f + else: + f_noskew = bijectors.SinhArcsinh( + skewness=skewness.dtype.as_numpy_dtype(0.), + tailweight=tailweight, event_ndims=0) - # Make the Affine bijector, Z --> loc + C * Z. - c = 2 * scale / f.forward(ops.convert_to_tensor(2, dtype=dtype)) + # Make the Affine bijector, Z --> loc + scale * Z (2 / F_0(2)) + c = 2 * scale / f_noskew.forward(ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine( shift=loc, scale_identity_multiplier=c, diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index 488724e80c..544a871070 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""SinhArcsinh transformation of a distribution.""" +"""Multi-dimensional (Vector) SinhArcsinh transformation of a distribution.""" from __future__ import absolute_import from __future__ import division @@ -52,8 +52,9 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): matrix multiplication): ``` - Y := loc + scale @ F(Z) * (2 / F(2)) + Y := loc + scale @ F(Z) * (2 / F_0(2)) F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) + F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) ``` This distribution is similar to the location-scale transformation @@ -62,7 +63,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): * If `skewness = 0` and `tailweight = 1` (the defaults), `F(Z) = Z`, and then `Y = L(Z)` exactly. * `loc` is used in both to shift the result by a constant factor. - * Our definition of `C` ensures that + * The multiplication of `scale` by `2 / F_0(2)` ensures that if `skewness = 0` `P[Y - loc <= 2 * scale] = P[L(Z) - loc <= 2 * scale]`. Thus it can be said that the weights in the tails of `Y` and `L(Z)` beyond `loc + 2 * scale` are the same. @@ -85,12 +86,12 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): `|Z| >> (|skewness| * tailweight)**tailweight`, we have `Y approx 0.5 Z**tailweight e**(sign(Z) skewness * tailweight)`. - To see the argument about `C` and quantiles, note that + To see the argument regarding multiplying `scale` by `2 / F_0(2)`, ``` - P[(Y - loc) / scale <= 2] = P[F(Z) <= 2 * scale / C] - = P[Z <= F^{-1}(2 * scale / C)] - = P[Z <= 2]. + P[(Y - loc) / scale <= 2] = P[F(Z) * (2 / F_0(2)) <= 2] + = P[F(Z) <= F_0(2)] + = P[Z <= 2] (if F = F_0). ``` """ @@ -171,12 +172,14 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): ]): loc = ops.convert_to_tensor(loc, name="loc") if loc is not None else loc tailweight = 1. if tailweight is None else tailweight + has_default_skewness = skewness is None skewness = 0. if skewness is None else skewness # Recall, with Z a random variable, # Y := loc + C * F(Z), # F(Z) := Sinh( (Arcsinh(Z) + skewness) * tailweight ) - # C := 2 * scale / F(2) + # F_0(Z) := Sinh( Arcsinh(Z) * tailweight ) + # C := 2 * scale / F_0(2) # Construct shapes and 'scale' out of the scale_* and loc kwargs. # scale_linop is only an intermediary to: @@ -213,9 +216,16 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): tailweight, dtype=dtype, name="tailweight") f = bijectors.SinhArcsinh( skewness=skewness, tailweight=tailweight, event_ndims=1) + if has_default_skewness: + f_noskew = f + else: + f_noskew = bijectors.SinhArcsinh( + skewness=skewness.dtype.as_numpy_dtype(0.), + tailweight=tailweight, event_ndims=0) # Make the Affine bijector, Z --> loc + C * Z. - c = 2 * scale_diag_part / f.forward(ops.convert_to_tensor(2, dtype=dtype)) + c = 2 * scale_diag_part / f_noskew.forward( + ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine( shift=loc, scale_diag=c, validate_args=validate_args, event_ndims=1) -- GitLab From 9d8346a1204d05b2ab16c169a6a6077167fe162a Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 6 Oct 2017 08:15:48 -0700 Subject: [PATCH 102/909] [Grappler] Reorder cast and transpose. A common pattern after the layout optimizer is casting an uint8 NHWC image to float before transposing it to NCHW. It is beneficial to reorder the cast and the transpose to make the transpose process smaller amount of data. This optimization converts Transpose(Cast(image, dst_type), perm) to Cast(Transpose(image, perm), dst_type) when sizeof(image.type) < sizeof(dst_type). PiperOrigin-RevId: 171294111 --- .../optimizers/arithmetic_optimizer.cc | 81 +++++++++++++++++++ .../optimizers/arithmetic_optimizer_test.cc | 66 +++++++++++++++ 2 files changed, 147 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 2d7cf3b182..343820de71 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/tensor_coding.h" +#include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { namespace grappler { @@ -274,6 +275,26 @@ static bool SimplyReordersData(const NodeDef& node) { return node.op() == "Transpose"; } +// Returns the data type in attribute `attr_name` of `node`. If that attribute +// doesn't exist, returns DT_INVALID. +static DataType GetDataTypeFromAttr(const NodeDef& node, + const string& attr_name) { + if (!node.attr().count(attr_name)) { + return DT_INVALID; + } + const auto& attr = node.attr().at(attr_name); + if (attr.value_case() != AttrValue::kType) { + return DT_INVALID; + } + return attr.type(); +} + +static bool IsNumberType(DataType dtype) { + DataTypeVector number_types = NumberTypes(); + return std::find(number_types.begin(), number_types.end(), dtype) != + number_types.end(); +} + string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, GraphDef* graph_def, NodeMap* node_map, std::vector* new_nodes) const { @@ -320,6 +341,66 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } + if (node->op() == "Transpose") { + // Reorder Cast and Transpose if beneficial. + // + // A common pattern after the layout optimizer is casting an uint8 NHWC + // image to float before transposing it to NCHW. It is beneficial to reorder + // the cast and the transpose to make the transpose process smaller amount + // of data. This optimization converts + // Transpose(Cast(image, dst_type), perm) + // to + // Cast(Transpose(image, perm), dst_type) + // when sizeof(image.type) < sizeof(dst_type). + // + // TODO(jingyue): This optimization can be generalized to a cast followed by + // a chain of ops that merely reorder elements (e.g. Reshape and + // DepthToSpace). + const NodeDef* transpose = node; + string dontcare; + string device; + // This optimization can be dangerous on devices other than CPU and GPU. The + // transpose might not be implemented for image.type, or might be slower + // with image.type than with dst_type. + if (DeviceNameUtils::SplitDeviceName(transpose->device(), &dontcare, + &device) && + (StringPiece(device).contains(DEVICE_CPU) || + StringPiece(device).contains(DEVICE_GPU))) { + const NodeDef* cast = node_map->GetNode(transpose->input(0)); + if (cast->op() == "Cast") { + const NodeDef* input = node_map->GetNode(cast->input(0)); + const DataType src_type = GetDataTypeFromAttr(*cast, "SrcT"); + const DataType dst_type = GetDataTypeFromAttr(*cast, "DstT"); + if (IsNumberType(src_type) && IsNumberType(dst_type) && + DataTypeSize(src_type) < DataTypeSize(dst_type)) { + NodeDef* new_transpose = graph_def->add_node(); + *new_transpose = *transpose; + new_transpose->set_name(transpose->name() + "_" + + DataTypeString(src_type)); + (*new_transpose->mutable_attr())["T"].set_type(src_type); + node_map->AddNode(new_transpose->name(), new_transpose); + + new_transpose->set_input(0, cast->input(0)); + node_map->AddOutput(input->name(), new_transpose->name()); + node_map->AddOutput(NodeName(new_transpose->input(1)), + new_transpose->name()); + + NodeDef* new_cast = graph_def->add_node(); + *new_cast = *cast; + new_cast->set_name(cast->name() + "_new"); + node_map->AddNode(new_cast->name(), new_cast); + + new_cast->set_input(0, new_transpose->name()); + node_map->AddOutput(new_transpose->name(), new_cast->name()); + + new_nodes->push_back(new_transpose); + new_nodes->push_back(new_cast); + return new_cast->name(); + } + } + } + } + // Fold a multiply of a scalar into the following convolution. This folding // can jump across nodes that merely reorders data (such as reshape and // transpose). For example, we can optimize diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index c8bca4282b..b3405646eb 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -109,6 +109,72 @@ TEST_F(ArithmeticOptimizerTest, CombineReshapes) { [](const NodeDef& node) { return node.op() == "Reshape"; })); } +TEST_F(ArithmeticOptimizerTest, ReorderTransposeCast) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice("/gpu:0"); + Output nhwc_uint8 = + ops::Placeholder(s, DT_UINT8, ops::Placeholder::Shape({8, 28, 28, 3})); + Output nhwc_fp32 = ops::Cast(s, nhwc_uint8, DT_FLOAT); + Output nchw_fp32 = + ops::Transpose(s, nhwc_fp32, ops::Const(s, {0, 3, 1, 2}, {4})); + Output outputs = ops::Identity(s.WithOpName("outputs"), nchw_fp32); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + const NodeDef* transpose_node = nullptr; + for (const NodeDef& node : output.node()) { + if (node.op() == "Transpose") { + EXPECT_EQ(transpose_node, nullptr); + EXPECT_EQ(DT_UINT8, node.attr().at("T").type()); + transpose_node = &node; + } + } + EXPECT_NE(transpose_node, nullptr); + + for (const NodeDef& node : output.node()) { + if (node.op() == "Cast") { + EXPECT_EQ(NodeName(node.input(0)), transpose_node->name()); + } + } +} + +TEST_F(ArithmeticOptimizerTest, NoReorderTransposeCast) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice("/gpu:0"); + Output nhwc_fp32 = + ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({8, 28, 28, 3})); + Output nhwc_uint8 = ops::Cast(s, nhwc_fp32, DT_UINT8); + Output nchw_uint8 = + ops::Transpose(s, nhwc_uint8, ops::Const(s, {0, 3, 1, 2}, {4})); + Output outputs = ops::Identity(s.WithOpName("outputs"), nchw_uint8); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + int num_transposes = 0; + for (const NodeDef& node : output.node()) { + if (node.op() == "Transpose") { + EXPECT_EQ(DT_UINT8, node.attr().at("T").type()); + EXPECT_EQ(node.input(0), "Cast"); + ++num_transposes; + } + } + EXPECT_EQ(1, num_transposes); +} + TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs_shape = -- GitLab From 2226790bbf19638eb3535abe521df7b16a109147 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 6 Oct 2017 08:23:46 -0700 Subject: [PATCH 103/909] Internal Change PiperOrigin-RevId: 171294796 --- tensorflow/leakr_file_type_recipe.ftrcp | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tensorflow/leakr_file_type_recipe.ftrcp diff --git a/tensorflow/leakr_file_type_recipe.ftrcp b/tensorflow/leakr_file_type_recipe.ftrcp new file mode 100644 index 0000000000..0521a084c7 --- /dev/null +++ b/tensorflow/leakr_file_type_recipe.ftrcp @@ -0,0 +1,30 @@ +name: "TensorFlow filetype recipes" +desc: "Copybara leakr checks, used by copy.bara.sky." + +file_config:{ + name: "Image labels text file skip" + desc: "Generic text files." + pattern: ".*labels.txt" + compression: COMPRESSION_NONE + scan_mode: SCAN_SKIP + file_group: FG_PLAIN_TEXT_GENERIC +} + +file_config:{ + name: "[Mediafiles] Graphics" + desc: "All media files that are images, graphics and icons." + ext: "bmp" + ext: "gif" + ext: "icns" + ext: "ico" + ext: "jpeg" + ext: "jpg" + ext: "png" + ext: "svg" + ext: "tga" + ext: "tiff" + ext: "webp" + compression: COMPRESSION_NONE + scan_mode: SCAN_SKIP + file_group: FG_MEDIA_GRAPHICS +} \ No newline at end of file -- GitLab From fb0df6d9de9acb1d598c0400a705d16e8cd4f693 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 09:12:22 -0700 Subject: [PATCH 104/909] [XLA:LLVM] Allow LLVM AA to work cross-functions. Create our AA domain with createAliasScopeDomain rather than createAnonymousAliasScopeDomain. This way inlining does not duplicate the domain (and thus prevent us from reasoning about loads/stores that cross the inlined function boundary). PiperOrigin-RevId: 171299706 --- .../compiler/xla/service/llvm_ir/alias_analysis.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc index 5e28e37600..bdddc232ef 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc @@ -92,7 +92,16 @@ void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo, llvm::MDNode* AliasAnalysis::GetAliasDomain() { llvm::MDBuilder metadata_builder(*context_); if (alias_domain_ == nullptr) { - alias_domain_ = metadata_builder.createAnonymousAliasScopeDomain(); + // We use createAliasScopeDomain rather than createAnonymousAliasScopeDomain + // so that when functions get inlined, we continue using the one domain, + // rather than duplicating it (and thus having two AA domains in one + // function). + // + // A side-effect of this is that if you ever compile two HLO modules in the + // same LLVM module, they'll have the same alias scope domain. This isn't a + // problem because the two HLO modules will never interact with one another. + alias_domain_ = + metadata_builder.createAliasScopeDomain("XLA global AA domain"); } return alias_domain_; } -- GitLab From 3251bc07927c6a60916fc274e11445d42e5ec193 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 09:24:17 -0700 Subject: [PATCH 105/909] Fixed typo in DynamicRnnEstimator __init__ documentation. PiperOrigin-RevId: 171300981 --- .../learn/python/learn/estimators/dynamic_rnn_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 1724d7599d..69440e823e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -639,7 +639,7 @@ class DynamicRnnEstimator(estimator.Estimator): ValueError: `problem_type` is not one of `ProblemType.LINEAR_REGRESSION` or `ProblemType.CLASSIFICATION`. ValueError: `problem_type` is `ProblemType.CLASSIFICATION` but - `num_classes` is not specifieProblemType + `num_classes` is not specified. ValueError: `prediction_type` is not one of `PredictionType.MULTIPLE_VALUE` or `PredictionType.SINGLE_VALUE`. """ -- GitLab From 2daa40f9d096d47fc3add05a36fb7e41a00ba69d Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Fri, 6 Oct 2017 09:35:06 -0700 Subject: [PATCH 106/909] Fix transpose bug for large dimension. Add random tests of large shapes for better coverage. Update transpose benchmark with cases that swap one small dimension with one large dimension. PiperOrigin-RevId: 171302097 --- tensorflow/core/kernels/conv_ops_gpu_3.cu.cc | 127 +++++++++++++++- tensorflow/python/BUILD | 20 +++ .../python/kernel_tests/transpose_op_test.py | 74 +++++++++ tensorflow/python/ops/conv2d_benchmark.py | 141 ++++++++++++++++++ tensorflow/python/ops/transpose_benchmark.py | 48 ++++-- 5 files changed, 393 insertions(+), 17 deletions(-) create mode 100644 tensorflow/python/ops/conv2d_benchmark.py diff --git a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc index 3d4670c9ba..9083626fbf 100644 --- a/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc +++ b/tensorflow/core/kernels/conv_ops_gpu_3.cu.cc @@ -272,6 +272,88 @@ __global__ void SwapDimension1And2InTensor3UsingTiles(const T* input, } } +// Use shared memory tiles to swap dimension-1 and dimension-2 of a 3D tensor +// when only one of the dimension sizes is smaller than 16, +// where dimensions are zero-based: output[i][j][k] = input[i][k][j]. +// +// small_dim = the_smaller_dimension_size +// large_dim = the_larger_dimension_size +// tile_num_per_block = blockDim.x +// kTileLength = small_dim +// +// Each thread block operates on a single rectangle tile, where its width is +// kTileLength (we currently set it to 64) and its height is small_dim, +// We set the thread block's X dimension to be tile_num_per_block, and its Y +// and Z to be one. +template +__global__ void SwapDimension1And2InTensor3SmallDim(const T* input, + int batch_per_block, + Dimension<3> input_dims, + T* output) { + // TODO(yangzihao) avoid share memory bank conflict. + __shared__ T shared_memory_tile[ShmemSize]; + + eigen_assert(blockDim.y == 1); + eigen_assert(blockDim.z == 1); + eigen_assert(gridDim.z == 1); + + int block_offset = blockIdx.x * blockDim.x; + + int x = threadIdx.x; + int tile_height = blockDim.x; + + // Get tile height, width, and thread/block origin indices. + int small_dim = SmallDim2 ? input_dims[2] : input_dims[1]; + int large_dim = SmallDim2 ? input_dims[1] : input_dims[2]; + + int global_offset = small_dim * large_dim * (blockIdx.y * batch_per_block) + + (SmallDim2 ? block_offset * small_dim : block_offset); + if (global_offset >= (input_dims[0] * input_dims[1] * input_dims[2])) return; + + for (int batch = 0; batch < batch_per_block; ++batch) { + int block_origin_idx = + small_dim * large_dim * (blockIdx.y * batch_per_block + batch); + int thread_origin_idx = + block_origin_idx + + (SmallDim2 ? block_offset * small_dim : block_offset) + x; + + if (block_offset + blockDim.x > large_dim) { + tile_height = large_dim - block_offset; + } + + __syncthreads(); + + // Load a continuous memory region to shared memory tile. + if (x < tile_height) { + for (int y = 0; y < small_dim; y++) { + int shmem_index = + SmallDim2 ? (x + y * tile_height) : (x * small_dim + y); + shared_memory_tile[shmem_index] = + ldg(input + thread_origin_idx + + y * (SmallDim2 ? tile_height : large_dim)); + } + } + + __syncthreads(); + + // Get block origin index for output array. + int output_block_offset = block_origin_idx; + int output_block_idx = SmallDim2 ? block_offset : block_offset * small_dim; + int output_block_origin_idx = output_block_offset + output_block_idx; + + // Store the tranposed memory region in shared memory to device. + if (x < tile_height) { + for (int y = 0; y < small_dim; y++) { + int output_idx = output_block_origin_idx + x + + y * (SmallDim2 ? large_dim : tile_height); + int shmem_index = + SmallDim2 ? (x * small_dim + y) : (x + y * tile_height); + output[output_idx] = shared_memory_tile[shmem_index]; + } + } + } +} + // A Cuda custom kernel that convert input to output, given proper padding on // the left and the top. The padded value is zero. template @@ -420,25 +502,62 @@ template void RunSwapDimension1And2InTensor3(const GPUDevice& d, const T* input, const Dimension<3>& input_dims, T* output) { // If both dimensions are not trivial, use tiles for the actual swapping. + // If one dimension is trivial, use SmallDim kernel for swapping. // Otherwise, the trivial swapping relying on the ldg cache is more efficient. static const int kMinDimensionToUseTiles = 16; bool use_tiles = (input_dims[1] >= kMinDimensionToUseTiles && input_dims[2] >= kMinDimensionToUseTiles); + bool use_small_dim = ((input_dims[1] >= kMinDimensionToUseTiles && + input_dims[2] < kMinDimensionToUseTiles)) || + ((input_dims[1] < kMinDimensionToUseTiles && + input_dims[2] >= kMinDimensionToUseTiles)); + static const int NumSubTiles = 8; + if (use_tiles) { - // We get best performance when TileSize is the number of threads in a warp - // (32 on our GPUs) and NumSubTiles is 8, so our block size is 8 * 32 = 256 - // threads. static const int TileSize = 32; - static const int NumSubTiles = 8; Dimension<3> input_dims_in_tiles = { input_dims[0], (input_dims[1] + TileSize - 1) / TileSize, (input_dims[2] + TileSize - 1) / TileSize, }; int total_tiles_count = input_dims_in_tiles[0] * input_dims_in_tiles[1] * input_dims_in_tiles[2]; + // We get best performance when TileSize is the number of threads in a warp + // (32 on our GPUs) and NumSubTiles is 8, so our block size is 8 * 32 = 256 + // threads. SwapDimension1And2InTensor3UsingTiles<<< total_tiles_count, dim3(TileSize, NumSubTiles), 0, d.stream()>>>( input, input_dims, output); + } else if (use_small_dim) { + // When only one of the dimensions is smaller than kMinDimensionToUseTiles, + // we use one block to process a rectangle region with the size of + // kTileLength * small_dim. We found that when set kTileLength to 64 on + // TitanX Maxwell GPU, it achieves the best performance. + // large_dim + // +---------------...--------+ + // | | | | + // small_dim | | ... | | + // | | | | + // +--------------...---------+ + // \----- ------/ \- -/ + // V V + // kTileLength(tile_height) tile_height + static const int kTileLength = 64; + static const int kGridDimY = 65535; + int large_dim = std::max(input_dims[2], input_dims[1]); + int tile_num_per_block = (large_dim + kTileLength - 1) / kTileLength; + int grid_dim_y = std::min(input_dims[0], kGridDimY); + int batch_per_block = (input_dims[0] + grid_dim_y - 1) / grid_dim_y; + if (input_dims[2] < input_dims[1]) { + SwapDimension1And2InTensor3SmallDim< + T, kTileLength * kMinDimensionToUseTiles, true> + <<>>(input, batch_per_block, input_dims, output); + } else { + SwapDimension1And2InTensor3SmallDim< + T, kTileLength * kMinDimensionToUseTiles, false> + <<>>(input, batch_per_block, input_dims, output); + } } else { int total_element_count = input_dims[0] * input_dims[1] * input_dims[2]; CudaLaunchConfig config = GetCudaLaunchConfig(total_element_count, d); diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index ab3b851ef8..bdbad14660 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4060,6 +4060,26 @@ cuda_py_test( main = "ops/concat_benchmark.py", ) +cuda_py_test( + name = "conv2d_benchmark", + size = "large", + srcs = ["ops/conv2d_benchmark.py"], + additional_deps = [ + ":client", + ":client_testlib", + ":control_flow_ops", + ":framework_for_generated_wrappers", + ":nn_ops", + ":platform", + ":platform_benchmark", + ":random_ops", + ":variables", + "//third_party/py/numpy", + "//tensorflow/core:protos_all_py", + ], + main = "ops/conv2d_benchmark.py", +) + cuda_py_test( name = "split_benchmark", srcs = ["ops/split_benchmark.py"], diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py index 570fa79944..9e1f83395b 100644 --- a/tensorflow/python/kernel_tests/transpose_op_test.py +++ b/tensorflow/python/kernel_tests/transpose_op_test.py @@ -229,6 +229,80 @@ class TransposeTest(test.TestCase): self.assertAllEqual(np_ans, tf_ans) self.assertShapeEqual(np_ans, y) + def testLargeSizeGPU(self): + # If no GPU available, skip the test + if not test.is_gpu_available(cuda_only=True): + return + + large_shapes = [[1000000, 31, 3], [3, 1000000, 31], [3, 31, 1000000], + [10000, 310, 3], [3, 10000, 310], [3, 310, 10000], + [2, 1000, 1000], [1000, 2, 1000], [1000, 1000, 2]] + perms = [[0, 2, 1]] * 9 + + for input_shape, perm in zip(large_shapes, perms): + total_size = np.prod(input_shape) + inp = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_shape) + np_ans = self._np_transpose(inp, perm) + with self.test_session(use_gpu=True): + inx = ops.convert_to_tensor(inp) + y = array_ops.transpose(inx, perm) + tf_ans = y.eval() + self.assertAllEqual(np_ans, tf_ans) + self.assertShapeEqual(np_ans, y) + + def testRandomizedSmallDimLargeSizeGPU(self): + # If no GPU available, skip the test + if not test.is_gpu_available(cuda_only=True): + return + + # Draw 10 random shapes with large dimension sizes. + # 40% prob to generate dim[0] size within [1, 2047] + # 40% prob to generate dim[0] size within [2048, 4095] + # 20% prob to generate dim[0] size within [4096, 100000] + # 50% prob to use dim[1] as the small dim (<16) + num_samples = 10 + total_size = 500000 + small_size_limit = 2048 + large_size_limit = 95905 + small_size_percentage = 0.4 + medium_size_percentage = 0.4 + large_size_percentage = 0.2 + perms = [[0, 2, 1]] * num_samples + dim_zero_sizes = [] + dim_zero_sizes += list( + np.random.randint( + small_size_limit, size=int(small_size_percentage * num_samples)) + + 1) + dim_zero_sizes += list( + np.random.randint( + small_size_limit, size=int(medium_size_percentage * num_samples)) + + small_size_limit) + dim_zero_sizes += list( + np.random.randint( + large_size_limit, size=int(large_size_percentage * num_samples)) + + small_size_limit * 2) + input_shapes = [] + small_dim_limit = 16 + for dim_zero_size in dim_zero_sizes: + small_dim_size = np.random.randint(small_dim_limit - 1) + 1 + large_dim_size = int( + total_size / dim_zero_size / small_dim_size) + small_dim_limit + input_shapes += ([[dim_zero_size, small_dim_size, large_dim_size]] + if np.random.randint(2) else + [[dim_zero_size, large_dim_size, small_dim_size]]) + + for input_shape, perm in zip(input_shapes, perms): + # generate input data with random ints from 0 to 9. + inp = np.random.randint(10, size=input_shape) + np_ans = self._np_transpose(inp, perm) + with self.test_session(use_gpu=True): + inx = ops.convert_to_tensor(inp) + y = array_ops.transpose(inx, perm) + tf_ans = y.eval() + self.assertAllEqual(np_ans, tf_ans) + self.assertShapeEqual(np_ans, y) + self._ClearCachedSession() + def testNop(self): self._compareCpu(np.arange(0, 6).reshape([3, 2]).astype(np.float32), [0, 1]) diff --git a/tensorflow/python/ops/conv2d_benchmark.py b/tensorflow/python/ops/conv2d_benchmark.py new file mode 100644 index 0000000000..6992fa57ea --- /dev/null +++ b/tensorflow/python/ops/conv2d_benchmark.py @@ -0,0 +1,141 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmark for Conv2D op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +import time + +from tensorflow.python.client import session as session_lib +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def build_graph(device, input_shape, filter_shape, strides, padding, num_iters): + """builds a graph containing a sequence of conv2d operations. + + Args: + device: String, the device to run on. + input_shape: Shape of the input tensor. + filter_shape: Shape of the filter tensor. + strides: A list of ints. 1-D of length 4. The stride of sliding + window for each dimension of input. + padding: A string from: "SAME", "VALID". The type of padding + algorithm to use. + num_iters: number of iterations to run conv2d. + + Returns: + An array of tensors to run() + """ + with ops.device("/%s:0" % device): + inp = variables.Variable(random_ops.truncated_normal(input_shape)) + filt = variables.Variable(random_ops.truncated_normal(filter_shape)) + + outputs = [] + conv2d_op = nn_ops.conv2d(inp, filt, strides, padding, data_format="NHWC") + outputs.append(conv2d_op) + for _ in range(1, num_iters): + with ops.control_dependencies([conv2d_op]): + conv2d_op = nn_ops.conv2d( + inp, filt, strides, padding, data_format="NHWC") + outputs.append(conv2d_op) + return control_flow_ops.group(*outputs) + + +class Conv2DBenchmark(test.Benchmark): + """Benchmark conv2d!""" + + def _run_graph(self, device, input_shape, filter_shape, strides, padding, + num_iters): + """runs the graph and print its execution time. + + Args: + device: String, the device to run on. + input_shape: Shape of the input tensor. + filter_shape: Shape of the filter tensor. + strides: A list of ints. 1-D of length 4. The stride of sliding + window for each dimension of input. + padding: A string from: "SAME", "VALID". The type of padding + algorithm to use. num_iters: Number of iterations to run the + benchmark. + num_iters: number of iterations to run conv2d. + + Returns: + The duration of the run in seconds. + """ + graph = ops.Graph() + with graph.as_default(): + outputs = build_graph(device, input_shape, filter_shape, strides, padding, + num_iters) + with session_lib.Session(graph=graph) as session: + variables.global_variables_initializer().run() + # warmup runs + session.run(outputs) + + start_time = time.time() + session.run(outputs) + duration = (time.time() - start_time) / num_iters + + print("%s inputshape:%s filtershape:%s strides:%s padding:%s " + "%d iters: %.8f sec" % + (device, str(input_shape).replace(" ", ""), + str(filter_shape).replace(" ", ""), + str(strides).replace(" ", ""), padding, num_iters, duration)) + + name_template = ( + "conv2d_{device}_input_shape_{inputshape}_filter_shape_{filtershape}_" + "strides_{strides}_padding_{padding}") + + self.report_benchmark( + name=name_template.format( + device=device, + inputshape=str(input_shape).replace(" ", ""), + filtershape=str(filter_shape).replace(" ", ""), + strides=str(strides).replace(" ", ""), + padding=padding).replace(" ", ""), + iters=num_iters, + wall_time=duration / num_iters) + + return duration + + def benchmark_conv2d(self): + print("conv2d benchmark:") + + h = 500 + w = 500 + fh = 3 + fw = 3 + input_shapes = [] + filter_shapes = [] + for b, c in itertools.product([4, 16, 32], [i for i in range(3, 16)]): + input_shapes += [[b, h, w, c]] + filter_shapes += [[fh, fw, c, b]] + strides = [[1, 2, 2, 1]] + paddings = ["VALID", "SAME"] + for ishape, fshape in zip(input_shapes, filter_shapes): + for stride in strides: + for padding in paddings: + self._run_graph("gpu", ishape, fshape, stride, padding, 80) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/transpose_benchmark.py b/tensorflow/python/ops/transpose_benchmark.py index 63a314295e..6b5f0f20d8 100644 --- a/tensorflow/python/ops/transpose_benchmark.py +++ b/tensorflow/python/ops/transpose_benchmark.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ from tensorflow.python.platform import test def build_graph(device, input_shape, perm, datatype, num_iters): - """Build a graph containing a sequence of conv2d operations. + """builds a graph containing a sequence of conv2d operations. Args: device: String, the device to run on. @@ -50,10 +50,12 @@ def build_graph(device, input_shape, perm, datatype, num_iters): t = constant_op.constant(inp, shape=input_shape) outputs = [] - outputs.append(array_ops.transpose(t, perm)) - for i in range(1, num_iters): - with ops.control_dependencies([outputs[i - 1]]): - outputs.append(array_ops.transpose(t, perm)) + transpose_op = array_ops.transpose(t, perm) + outputs.append(transpose_op) + for _ in range(1, num_iters): + with ops.control_dependencies([transpose_op]): + transpose_op = array_ops.transpose(t, perm) + outputs.append(transpose_op) return control_flow_ops.group(*outputs) @@ -61,7 +63,7 @@ class TransposeBenchmark(test.Benchmark): """Benchmark transpose!""" def _run_graph(self, device, input_shape, perm, num_iters, datatype): - """Run the graph and print its execution time. + """runs the graph and print its execution time. Args: device: String, the device to run on. @@ -82,9 +84,11 @@ class TransposeBenchmark(test.Benchmark): session.run(outputs) start_time = time.time() session.run(outputs) + duration = (time.time() - start_time) / num_iters throughput = np.prod( np.array(input_shape)) * datatype().itemsize * 2 / duration / 1e9 + print("%s %s inputshape:%s perm:%s %d %.6fsec, %.4fGB/s." % (device, str(datatype), str(input_shape).replace(" ", ""), str(perm).replace(" ", ""), num_iters, duration, throughput)) @@ -108,12 +112,12 @@ class TransposeBenchmark(test.Benchmark): datatypes = [np.complex128, np.float64, np.float32, np.float16, np.int8] - small_shapes = [[2, 20, 20, 20, 16], [2, 16, 20, 20, 20]] * 2 + [[ - 2, 100, 100, 16 - ], [2, 16, 100, 100]] * 2 + [[2, 5000, 16], [2, 16, 5000]] * 2 - small_perms = [[0, 4, 1, 2, 3], [0, 2, 3, 4, 1]] + [[4, 1, 2, 3, 0]] * 2 + [ - [0, 3, 1, 2], [0, 2, 3, 1] - ] + [[3, 1, 2, 0]] * 2 + [[0, 2, 1]] * 2 + [[2, 1, 0]] * 2 + small_shapes = [[2, 20, 20, 20, 16], [2, 16, 20, 20, 20]] * 2 + small_shapes += [[2, 100, 100, 16], [2, 16, 100, 100]] * 2 + small_shapes += [[2, 5000, 16], [2, 16, 5000]] * 2 + small_perms = [[0, 4, 1, 2, 3], [0, 2, 3, 4, 1]] + [[4, 1, 2, 3, 0]] * 2 + small_perms += [[0, 3, 1, 2], [0, 2, 3, 1]] + [[3, 1, 2, 0]] * 2 + small_perms += [[0, 2, 1]] * 2 + [[2, 1, 0]] * 2 large_shapes = [[2, 40, 40, 40, 32], [2, 40, 40, 40, 64]] * 2 + [[ 2, 300, 300, 32 @@ -132,5 +136,23 @@ class TransposeBenchmark(test.Benchmark): for ishape, perm in zip(large_shapes, large_perms): self._run_graph("gpu", ishape, perm, num_iters, datatype) + small_dim_large_shapes = [[2, 10000, 3], [2, 3, 10000], [2, 10000, 8], + [2, 8, 10000]] + small_dim_small_shapes = [[2, 5000, 3], [2, 3, 5000], [2, 5000, 8], + [2, 8, 5000]] + small_dim_perms = [[0, 2, 1]] * 4 + + num_iters = 320 + small_dim_large_shape_datatypes = [np.float64, np.float32, np.int8] + for datatype in small_dim_large_shape_datatypes: + for ishape, perm in zip(small_dim_large_shapes, small_dim_perms): + self._run_graph("gpu", ishape, perm, num_iters, datatype) + + small_dim_small_shape_datatypes = [np.complex128, np.float16] + for datatype in small_dim_small_shape_datatypes: + for ishape, perm in zip(small_dim_small_shapes, small_dim_perms): + self._run_graph("gpu", ishape, perm, num_iters, datatype) + + if __name__ == "__main__": test.main() -- GitLab From 3acd57c2ffff6055b322ba08ba74fa1885fbba19 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 6 Oct 2017 09:37:33 -0700 Subject: [PATCH 107/909] Fuse TFE_NewOp and TFE_OpGetAttrType to avoid leaking memory. Removes TFE_NewOp and TFE_OpGetAttrType from pywrap_tensorflow, adds TFE_OpNameGetAttrType. PiperOrigin-RevId: 171302338 --- tensorflow/c/eager/c_api.cc | 14 ++++++++++++++ tensorflow/c/eager/c_api.h | 6 ++++++ tensorflow/python/eager/backprop.py | 4 ++-- tensorflow/python/pywrap_tfe.i | 3 +-- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 74f2e4f342..514a4010bc 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -273,6 +273,20 @@ TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, return ret; } +TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx, + const char* op_or_function_name, + const char* attr_name, unsigned char* is_list, + TF_Status* status) { + TF_AttrType ret; + TFE_Op* op = TFE_NewOp(ctx, op_or_function_name, status); + if (!status->status.ok()) { + return TF_ATTR_INT; // Same dummy return as TFE_OpGetAttrType. + } + ret = TFE_OpGetAttrType(op, attr_name, is_list, status); + TFE_DeleteOp(op); + return ret; +} + void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const char* value) { op->attrs.Set(attr_name, value); } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index a4f7d308fb..9bfa63711b 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -107,6 +107,12 @@ TF_CAPI_EXPORT extern void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_St TF_CAPI_EXPORT extern TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name, unsigned char* is_list, TF_Status* status); +// Get an attribute type given an op name; a fusion of TFE_NewOp and +// TFE_OpGetAttrType for use from Python without the overhead of the individual +// calls and memory management of TFE_Op. +TF_CAPI_EXPORT extern TF_AttrType TFE_OpNameGetAttrType( + TFE_Context* ctx, const char* op_or_function_name, const char* attr_name, + unsigned char* is_list, TF_Status* status); TF_CAPI_EXPORT extern void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const char* value); diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 3c84cbbd6f..cca8e47044 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -49,8 +49,8 @@ def op_attr_type(op_type, attr_name): except KeyError: with errors.raise_exception_on_not_ok_status() as status: h = context.context()._handle # pylint: disable=protected-access - op = pywrap_tensorflow.TFE_NewOp(h, op_type, status) - attr_type = pywrap_tensorflow.TFE_OpGetAttrType(op, attr_name, status) + attr_type = pywrap_tensorflow.TFE_OpNameGetAttrType( + h, op_type, attr_name, status) _op_attr_type_cache[(op_type, attr_name)] = attr_type return attr_type diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 128e46e6ce..d5b7294c82 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -19,8 +19,7 @@ limitations under the License. %rename("%s") TFE_DeleteContext; %rename("%s") TFE_ContextListDevices; %rename("%s") TFE_ContextAddFunctionDef; -%rename("%s") TFE_NewOp; -%rename("%s") TFE_OpGetAttrType; +%rename("%s") TFE_OpNameGetAttrType; %rename("%s") TFE_Py_InitEagerTensor; %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_Execute; -- GitLab From 8fcbef3428ce69de9cedafd0d4c0f141c79d418c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 09:46:44 -0700 Subject: [PATCH 108/909] [XLA:LLVM] Annotate tuple instructions with AA metadata. PiperOrigin-RevId: 171303412 --- tensorflow/compiler/xla/service/llvm_ir/ops.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.cc b/tensorflow/compiler/xla/service/llvm_ir/ops.cc index ac562e231c..3965433494 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ops.cc @@ -73,12 +73,13 @@ void EmitTuple(IrArray tuple, tensorflow::gtl::ArraySlice operands, llvm::IRBuilder<>* ir_builder) { for (size_t i = 0; i < operands.size(); ++i) { - ir_builder->CreateStore( + auto* store = ir_builder->CreateStore( ir_builder->CreatePointerCast(operands[i], PrimitiveTypeToIrType(TUPLE, ir_builder)), ir_builder->CreateInBoundsGEP( tuple.GetBasePointer(), {ir_builder->getInt64(0), ir_builder->getInt64(i)})); + tuple.AnnotateLoadStoreInstructionWithMetadata(store); } } -- GitLab From a9104e7529eb75454aaaa2ea29b8ebe40ee7bbd0 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 6 Oct 2017 09:46:44 -0700 Subject: [PATCH 109/909] Add documentation to sloppy_interleave function PiperOrigin-RevId: 171303413 --- tensorflow/contrib/data/python/ops/sloppy_ops.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/contrib/data/python/ops/sloppy_ops.py b/tensorflow/contrib/data/python/ops/sloppy_ops.py index 058c497320..4f3da4320c 100644 --- a/tensorflow/contrib/data/python/ops/sloppy_ops.py +++ b/tensorflow/contrib/data/python/ops/sloppy_ops.py @@ -102,6 +102,17 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): strictly obeys), producing an element from a different underlying dataset instead. + Example usage: + + ```python + # Preprocess 4 files concurrently. + filenames = tf.data.Dataset.list_files("/path/to/data/train*.tfrecords") + dataset = filenames.apply( + tf.contrib.data.sloppy_interleave( + lambda filename: tf.data.TFRecordDataset(filename), + cycle_length=4)) + ``` + WARNING: The order of elements in the resulting dataset is not deterministic. Use `Dataset.interleave()` if you want the elements to have a deterministic order. -- GitLab From 420d166e7f79d37d1be66d648dd99131068a8537 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 6 Oct 2017 09:51:05 -0700 Subject: [PATCH 110/909] Use a serialized graph compiler to generate xla graph. - Move away from previous TF graph executor, which contains few features that we need and also introduces indeterminism. - Unlike previous executor, the new serial graph compiler doesn't recurse into a function and inlines it. Instead, it creates a computation of the function and then creates a `call` op to call into the newly created computation. - Add a optional comparator in DFS algorithm, which is needed to make the compiler deterministic. RELNOTES: Use a determinisitc executor to generate xla graph. PiperOrigin-RevId: 171303938 --- tensorflow/compiler/tf2xla/BUILD | 2 + tensorflow/compiler/tf2xla/graph_compiler.cc | 185 ++++++++++++++++++ tensorflow/compiler/tf2xla/graph_compiler.h | 103 ++++++++++ tensorflow/compiler/tf2xla/xla_compiler.cc | 98 +++++----- tensorflow/compiler/tf2xla/xla_compiler.h | 2 +- .../compiler/tf2xla/xla_compiler_test.cc | 69 ++++++- tensorflow/compiler/xla/service/service.cc | 5 +- tensorflow/core/graph/algorithm.cc | 64 ++++-- tensorflow/core/graph/algorithm.h | 43 +++- tensorflow/core/graph/algorithm_test.cc | 35 ++++ tensorflow/core/graph/graph.h | 4 +- 11 files changed, 530 insertions(+), 80 deletions(-) create mode 100644 tensorflow/compiler/tf2xla/graph_compiler.cc create mode 100644 tensorflow/compiler/tf2xla/graph_compiler.h diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 4da2ed722e..647bfd1849 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -102,11 +102,13 @@ cc_library( "xla_helpers.cc", "xla_op_kernel.cc", "xla_op_registry.cc", + "graph_compiler.cc", "xla_cpu_backend.cc", ] + if_cuda_is_configured([ "xla_gpu_backend.cc", ]), hdrs = [ + "graph_compiler.h", "xla_compilation_device.h", "xla_compiler.h", "xla_context.h", diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc new file mode 100644 index 0000000000..c168266b16 --- /dev/null +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -0,0 +1,185 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/graph_compiler.h" + +#include +#include +#include + +#include "tensorflow/compiler/tf2xla/dump_graph.h" +#include "tensorflow/compiler/tf2xla/functionalize_control_flow.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_compilation_device.h" +#include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/executor.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/graph_optimizer.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +Status GraphCompiler::Compile() { + std::vector bindings(graph_->num_node_ids()); + std::vector topo_sorted_nodes; + // XLA requires determinism, generate a stable ordering from DFS. + GetReversePostOrder(*graph_, &topo_sorted_nodes, + /*stable_comparator=*/NodeComparatorID()); + + OpKernelContext::Params params; + PartiallySetupParams(¶ms); + + for (Node* n : topo_sorted_nodes) { + // Set up bindings. + NodeBinding& binding = bindings[n->id()]; + binding.node = n; + Status s = flib_->CreateKernel(n->def(), &binding.op_kernel); + binding.output_attrs.resize(n->num_outputs()); + if (!s.ok()) { + binding.op_kernel = nullptr; + s = AttachDef(s, *n); + LOG(ERROR) << "Executor failed to create kernel. " << s; + return s; + } + } + + // Bindings are initialized by the size of graph_->num_node_ids. However, the + // graph may contain dead nodes that still hold a valid node id. Thus + // graph_->num_node_ids could be larger than number of topo sorted nodes. + TF_RET_CHECK(bindings.size() >= topo_sorted_nodes.size()); + + for (Node* n : topo_sorted_nodes) { + TF_RET_CHECK(!n->IsRecv() && !n->IsSend() && !n->IsSwitch()) + << "Not supported node: " << n->DebugString(); + NodeBinding& binding = bindings[n->id()]; + params.op_kernel = binding.op_kernel; + params.output_attr_array = binding.output_attrs.data(); + + // tensor_inputs_ is a buffer reused across graph traversal. We clean up and + // reinitialize the buffer before we visit a new node. + tensor_inputs_.clear(); + tensor_inputs_.resize(n->num_inputs()); + + // Set up inputs from outputs of previous nodes. + for (auto* e : n->in_edges()) { + if (e->IsControlEdge()) continue; + Node* src = e->src(); + tensor_inputs_[e->dst_input()] = + bindings[src->id()].tensor_values[e->src_output()]; + } + + OpKernelContext op_context(¶ms, n->num_outputs()); + if (IsFunctional(n)) { + TF_RETURN_IF_ERROR(CompileFunctionalNode(n, &op_context)); + } else { + device_->Compute(CHECK_NOTNULL(params.op_kernel), &op_context); + Status s = op_context.status(); + TF_RETURN_IF_ERROR(s); + } + + // Set up outputs. Also check if outputs from the previous computation is + // valid. + for (int o = 0; o < n->num_outputs(); ++o) { + const auto tensor_val = op_context.release_output(o); + if (*op_context.is_output_dead() || tensor_val.tensor == nullptr) { + return errors::Internal("Missing xla_context ", o, "-th output from ", + (*op_context.is_output_dead() ? "(dead)" : ""), + SummarizeNode(*n)); + } + binding.tensor_values.push_back(tensor_val); + } + } + + // Clean up tensor data and op kernels. + for (NodeBinding& binding : bindings) { + delete binding.op_kernel; + for (auto& t : binding.tensor_values) { + if (!t.is_ref()) { + delete t.tensor; + } + } + } + return Status::OK(); +} + +bool GraphCompiler::IsFunctional(Node* n) { + return n->type_string() == FunctionLibraryDefinition::kGradientOp || + (flib_->GetFunctionLibraryDefinition()->Find(n->def().op()) != + nullptr); +} + +Status GraphCompiler::CompileFunctionalNode(Node* n, + OpKernelContext* op_context) { + TF_RET_CHECK(IsFunctional(n)); + // For functional nodes, compile them using compiler_ and call into the + // functions. + XlaOpKernelContext xla_op_context(op_context); + + std::vector arguments; + XlaCompiler::CompilationResult result; + NameAttrList func; + if (flib_->GetFunctionLibraryDefinition()->Find(n->def().op())) { + func.set_name(n->def().op()); + } else { + func.set_name(FunctionLibraryDefinition::kGradientOp); + } + *func.mutable_attr() = n->def().attr(); + + // Compile the graph using the function compiler. + TF_ASSIGN_OR_RETURN(auto computation, compiler_(func, &xla_op_context)); + XlaContext& context = XlaContext::Get(op_context); + auto* b = context.builder(); + + // Graph data handles from the inputs. + std::vector handles; + for (auto tensor : tensor_inputs_) { + auto expression = + reinterpret_cast(tensor->tensor_data().data()); + // TODO(yunxing): Support two rare cases below where input is a resource or + // contains a null handle. + TF_RET_CHECK(expression->resource() == nullptr) + << "Input with resource is not supported."; + TF_RET_CHECK(expression->handle().handle() != 0) + << "Invalid computation handle."; + handles.push_back(expression->handle()); + } + auto output_handle = b->Call(*computation, handles); + // The output handle of `Call` computation is a tuple type. Unzip it so + // that it can into fit future computations. + for (int64 idx = 0; idx < n->num_outputs(); ++idx) { + xla_op_context.SetOutput(idx, b->GetTupleElement(output_handle, idx)); + } + return b->first_error(); +} + +void GraphCompiler::PartiallySetupParams(OpKernelContext::Params* params) { + params->device = device_; + params->inputs = &tensor_inputs_; + params->step_container = step_container_; + params->resource_manager = device_->resource_manager(); +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/graph_compiler.h b/tensorflow/compiler/tf2xla/graph_compiler.h new file mode 100644 index 0000000000..6fc0b18dcd --- /dev/null +++ b/tensorflow/compiler/tf2xla/graph_compiler.h @@ -0,0 +1,103 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ +#define TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ + +#include "tensorflow/compiler/tf2xla/xla_compilation_device.h" +#include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +using FunctionCompiler = + std::function>( + const NameAttrList& function, XlaOpKernelContext* xla_op_context)>; + +// GraphCompiler compiles the graph in topological order in the current +// thread. It also resolves the nondeterminism in the graph by enforcing a total +// order on all inputs to a node. This abstraction helps us create the same XLA +// computation given two structurally equivalent TensorFlow graphs. If a +// function call is visited during the graph traversal, it is then compiled +// through the FunctionCompiler into a computation and a `Call` operation is +// inserted to call into that computation. +class GraphCompiler { + public: + GraphCompiler(XlaContext* xla_context, XlaCompilationDevice* device, + Graph* graph, FunctionLibraryRuntime* flib, + ScopedStepContainer* step_container, + const FunctionCompiler& compiler) + : xla_context_(xla_context), + device_(device), + graph_(graph), + flib_(flib), + step_container_(step_container), + compiler_(compiler) {} + + // Compiles the graph. The results are written in `xla_context` that is passed + // into the compiler. + Status Compile(); + + private: + // NodeBinding is a wrapper on a `Node` that also contains computed + // TensorValue. + struct NodeBinding { + const Node* node; + // Kernel for this node, to be filled by CreateKernel. + OpKernel* op_kernel; + // Output values of this node. + std::vector tensor_values; + // Attributes of the outputs. + gtl::InlinedVector output_attrs; + }; + + // Partially sets params. This partially set params can be reused + // across multple nodes visit. + void PartiallySetupParams(OpKernelContext::Params* params); + + // Tests if a node is a functional node. A functional node represents a + // defined computation and should be compiled using `compiler_`. + bool IsFunctional(Node*); + + // Compiles a functional node and writes result to OpkernelContext. A + // functional node represents a defined computation and should be compiled + // using `compiler_`. + Status CompileFunctionalNode(Node*, OpKernelContext*); + + XlaContext* xla_context_; + XlaCompilationDevice* device_; + Graph* graph_; + FunctionLibraryRuntime* flib_; + ScopedStepContainer* step_container_; + FunctionCompiler compiler_; + // A buffer to hold tensor inputs to a node, this is reused across the graph + // traversal. + gtl::InlinedVector tensor_inputs_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 8521d4167a..9e405578aa 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -20,10 +20,12 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/functionalize_control_flow.h" +#include "tensorflow/compiler/tf2xla/graph_compiler.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/executor.h" @@ -178,9 +180,34 @@ Status XlaCompiler::CompileFunction( namespace { -Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr graph, - XlaCompilationDevice* device, FunctionLibraryRuntime* flib, - int64 step_id) { +// Builds XlaCompiler argument descriptions `args` from `ctx`. +Status MakeXlaCompilerArgumentsFromInputs( + XlaOpKernelContext* ctx, std::vector* args) { + VLOG(2) << "Num inputs " << ctx->num_inputs(); + args->resize(ctx->num_inputs()); + for (int i = 0; i < ctx->num_inputs(); ++i) { + VLOG(2) << " Input " << i + << " type: " << DataTypeString(ctx->input_type(i)) + << " shape: " << ctx->InputShape(i).DebugString(); + XlaCompiler::Argument& arg = (*args)[i]; + DataType type = ctx->input_type(i); + + if (type == DT_RESOURCE) { + return errors::InvalidArgument( + "Resource as function argument is not yet implemented."); + } else { + arg.kind = XlaCompiler::Argument::kParameter; + arg.type = ctx->input_type(i); + TF_RETURN_IF_ERROR( + TensorShapeToXLAShape(arg.type, ctx->InputShape(i), &arg.shape)); + } + } + return Status::OK(); +} + +Status ExecuteGraph(XlaCompiler* compiler, XlaContext* xla_context, + std::unique_ptr graph, XlaCompilationDevice* device, + FunctionLibraryRuntime* flib, int64 step_id) { // Resource cleanup is a bit messy. XlaContext is a ref-counted resource; the // resource manager takes ownership via Create, and unrefs via Cleanup. We // explicitly add a reference to ensure the refcount at entry is maintained at @@ -197,56 +224,27 @@ Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr graph, TF_RETURN_IF_ERROR(device->resource_manager()->Create( step_container->name(), XlaContext::kXlaContextResourceName, xla_context)); - - // Create a LocalExecutor that will own and run the graph. - // TODO(b/66947550): migrate away from using an Executor in order to guarantee - // determinism and thread-safety. - LocalExecutorParams exec_params; - exec_params.device = device; - exec_params.function_library = flib; - exec_params.create_kernel = [flib](const NodeDef& ndef, OpKernel** kernel) { - return flib->CreateKernel(ndef, kernel); - }; - exec_params.delete_kernel = [](OpKernel* kernel) { delete kernel; }; - Executor* exec_ptr = nullptr; - TF_RETURN_IF_ERROR(NewLocalExecutor(exec_params, graph.release(), &exec_ptr)); - std::unique_ptr exec(exec_ptr); - // At this point ownership of the graph has been transferred to exec. - - // Run the graph symbolically, turning the graph into an XLA computation. - Executor::Args exec_args; - exec_args.step_id = step_id; - exec_args.step_container = step_container.get(); - - // Pushes closures to run onto `worklist`. We don't run the closures directly - // from 'runner' since that might lead to a stack overflow for large graphs. - std::deque worklist; - exec_args.runner = [&](Executor::Args::Closure c) { - worklist.push_back(std::move(c)); + // Compile_func is used to tell the serial executor how to compile a function. + auto compile_func = [&](const NameAttrList& function, + XlaOpKernelContext* xla_op_context) + -> xla::StatusOr> { + std::vector arguments; + + TF_RETURN_IF_ERROR( + MakeXlaCompilerArgumentsFromInputs(xla_op_context, &arguments)); + + XlaCompiler::CompilationResult result; + TF_RETURN_IF_ERROR(compiler->CompileFunction(XlaCompiler::CompileOptions(), + function, arguments, &result)); + return result.computation; }; - // The following code assumes there is only one thread involved and no - // concurrency, because we did not provide Executor a threaded runner. Async - // ops on the XlaCompilation device must not use threads or concurrency - // internally. - bool done = false; - exec->RunAsync(exec_args, [&](const Status& s) { - status = s; - done = true; - }); - // Repeatedly run closures from the worklist until `done` is signalled. - while (!done) { - TF_RET_CHECK(!worklist.empty()); - Executor::Args::Closure& c = worklist.front(); - c(); - worklist.pop_front(); - } - TF_RETURN_WITH_CONTEXT_IF_ERROR( - status, "Conversion from TensorFlow graph to XLA computation failed."); - + GraphCompiler graph_compiler(xla_context, device, graph.get(), flib, + step_container.get(), compile_func); + TF_RETURN_IF_ERROR(graph_compiler.Compile()); // Explicitly clean up the step container, to capture the cleanup status. step_container.reset(); - return status; + return Status::OK(); } // Builds XLA computations for each of the arguments to the computation. @@ -494,7 +492,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, &result->input_mapping, &result->xla_input_shapes)); context->set_args(std::move(arg_expressions)); - TF_RETURN_IF_ERROR(ExecuteGraph(context, std::move(graph), device_, + TF_RETURN_IF_ERROR(ExecuteGraph(this, context, std::move(graph), device_, flib_runtime_, NextStepId())); int num_nonconst_outputs; diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 35159dbad4..0435c619f8 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -240,7 +240,7 @@ class XlaCompiler { bool use_tuple_arg = false; // If 'return_updated_values_for_all_resources' is true, then updated - // values of all resource resources arguments will be included in the + // values of all resource arguments will be included in the // 'resource_updates' of the computation, even if the resource was not // modified by the computation. Used when compiling loop bodies to ensure // the input and output signatures match. diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 531725a623..88ed3b89a6 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -63,6 +63,7 @@ class DummyReadResourceOp : public XlaOpKernel { dummy->Unref(); ctx->SetOutput(0, ctx->Input(0)); + ctx->SetOutput(1, ctx->Input(0)); } }; @@ -80,22 +81,25 @@ class DummyReadResourceCC { if (!scope.ok()) return; scope.UpdateStatus(scope.DoShapeInference(ret)); if (!scope.ok()) return; - this->output_ = Output(ret, 0); + this->output1_ = Output(ret, 0); + this->output2_ = Output(ret, 1); } - Node* node() const { return output_.node(); } - Output output_; + Output output1_; + Output output2_; }; REGISTER_OP("DummyReadResource") .Input("input: int32") - .Output("output: int32") + .Output("output1: int32") + .Output("output2: int32") .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( A dummy Op. input: dummy input. -output: dummy output. +output1: dummy output. +output2: dummy output. )doc"); REGISTER_XLA_OP(Name("DummyReadResource"), DummyReadResourceOp); @@ -316,7 +320,8 @@ TEST_F(XlaCompilerTest, ResourceManager) { Scope scope = Scope::NewRootScope().ExitOnError(); auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); auto b = DummyReadResourceCC(scope.WithOpName("B"), a); - auto c = ops::_Retval(scope.WithOpName("C"), b.output_, 0); + auto c = ops::Add(scope.WithOpName("C"), b.output2_, b.output1_); + auto d = ops::_Retval(scope.WithOpName("D"), c, 0); std::unique_ptr graph(new Graph(OpRegistry::Global())); TF_ASSERT_OK(scope.ToGraph(graph.get())); @@ -349,6 +354,58 @@ TEST_F(XlaCompilerTest, ResourceManager) { resource->Unref(); } +// Tests compilation and execution of a graph that adds two tensors. +TEST_F(XlaCompilerTest, DeterministicCompilation) { + // Builds a graph that contains a node with two output edges. The compiler + // should always traverse them in the same order. + const int64 test_count = 2; + + std::vector results(test_count); + + for (int64 i = 0; i < test_count; ++i) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); + auto b = ops::Neg(scope.WithOpName("B"), a); + auto c = ops::Neg(scope.WithOpName("C"), a); + auto d = ops::Add(scope.WithOpName("D"), b, c); + auto e = ops::_Retval(scope.WithOpName("E"), d, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the argument. + std::vector args(1); + args[0].kind = XlaCompiler::Argument::kParameter; + args[0].type = DT_INT32; + args[0].shape = xla::ShapeUtil::MakeShape(xla::S32, {2}); + + // Compiles the graph. + auto options = DefaultOptions(); + XlaCompiler compiler(options); + + TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "dummy", + std::move(graph), args, &results[i])); + } + + for (int64 i = 1; i < test_count; ++i) { + auto m1 = + results[i - 1].computation->Snapshot().ValueOrDie()->entry().requests(); + auto m2 = + results[i].computation->Snapshot().ValueOrDie()->entry().requests(); + // Check if every entry is the same. + for (auto& entry1 : m1) { + int64 key = entry1.first; + auto value1 = entry1.second; + auto entry2 = m2.find(key); + auto value2 = entry2->second; + EXPECT_TRUE(entry2 != m2.end()); + string str1, str2; + value1.AppendToString(&str1); + value2.AppendToString(&str2); + EXPECT_EQ(str1, str2); + } + } +} + // Tests a computation that receives a TensorArray resource as input and // updates it. TEST_F(XlaCompilerTest, CanPassTensorArraysToAndFromComputation) { diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index bd7898a41f..d279e1f50f 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -187,8 +187,9 @@ tensorflow::Status Service::Computation(const ComputationRequest* arg, *result->mutable_computation() = computation_tracker_.NewComputation(arg->name()); - VLOG(1) << Printf("Created new computation %s on service %p", - result->computation().ShortDebugString().c_str(), this); + VLOG(1) << Printf("Created new computation %s on service %p, name %s", + result->computation().ShortDebugString().c_str(), this, + arg->name().c_str()); return tensorflow::Status::OK(); } diff --git a/tensorflow/core/graph/algorithm.cc b/tensorflow/core/graph/algorithm.cc index 3bfba3fc4e..6ef51aa7df 100644 --- a/tensorflow/core/graph/algorithm.cc +++ b/tensorflow/core/graph/algorithm.cc @@ -24,7 +24,8 @@ limitations under the License. namespace tensorflow { void DFS(const Graph& g, const std::function& enter, - const std::function& leave) { + const std::function& leave, + const NodeComparator& stable_comparator) { // Stack of work to do. struct Work { Node* node; @@ -51,24 +52,41 @@ void DFS(const Graph& g, const std::function& enter, // Arrange to call leave(n) when all done with descendants. if (leave) stack.push_back(Work{n, true}); - // Arrange to work on descendants. - for (Node* out : n->out_nodes()) { + gtl::iterator_range nodes = n->out_nodes(); + auto add_work = [&visited, &stack](Node* out) { if (!visited[out->id()]) { // Note; we must not mark as visited until we actually process it. stack.push_back(Work{out, false}); } + }; + + if (stable_comparator) { + std::vector nodes_sorted; + for (Node* out : nodes) { + nodes_sorted.emplace_back(out); + } + std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator); + for (Node* out : nodes_sorted) { + add_work(out); + } + } else { + for (Node* out : nodes) { + add_work(out); + } } } } void ReverseDFS(const Graph& g, const std::function& enter, - const std::function& leave) { - ReverseDFSFrom(g, {g.sink_node()}, enter, leave); + const std::function& leave, + const NodeComparator& stable_comparator) { + ReverseDFSFrom(g, {g.sink_node()}, enter, leave, stable_comparator); } void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, const std::function& enter, - const std::function& leave) { + const std::function& leave, + const NodeComparator& stable_comparator) { // Stack of work to do. struct Work { Node* node; @@ -97,23 +115,41 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, // Arrange to call leave(n) when all done with descendants. if (leave) stack.push_back(Work{n, true}); - // Arrange to work on parents. - for (Node* in : n->in_nodes()) { - if (!visited[in->id()]) { + gtl::iterator_range nodes = n->in_nodes(); + + auto add_work = [&visited, &stack](Node* out) { + if (!visited[out->id()]) { // Note; we must not mark as visited until we actually process it. - stack.push_back(Work{in, false}); + stack.push_back(Work{out, false}); + } + }; + + if (stable_comparator) { + std::vector nodes_sorted; + for (Node* in : nodes) { + nodes_sorted.emplace_back(in); + } + std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator); + for (Node* in : nodes_sorted) { + add_work(in); + } + } else { + for (Node* in : nodes) { + add_work(in); } } } } -void GetPostOrder(const Graph& g, std::vector* order) { +void GetPostOrder(const Graph& g, std::vector* order, + const NodeComparator& stable_comparator) { order->clear(); - DFS(g, nullptr, [order](Node* n) { order->push_back(n); }); + DFS(g, nullptr, [order](Node* n) { order->push_back(n); }, stable_comparator); } -void GetReversePostOrder(const Graph& g, std::vector* order) { - GetPostOrder(g, order); +void GetReversePostOrder(const Graph& g, std::vector* order, + const NodeComparator& stable_comparator) { + GetPostOrder(g, order, stable_comparator); std::reverse(order->begin(), order->end()); } diff --git a/tensorflow/core/graph/algorithm.h b/tensorflow/core/graph/algorithm.h index 01d36e0a12..5bb6041d98 100644 --- a/tensorflow/core/graph/algorithm.h +++ b/tensorflow/core/graph/algorithm.h @@ -25,24 +25,50 @@ limitations under the License. namespace tensorflow { +// Comparator for two nodes. This is used in order to get a stable ording. +using NodeComparator = std::function; + +// Compares two node based on their ids. +struct NodeComparatorID { + bool operator()(const Node* n1, const Node* n2) const { + return n1->id() < n2->id(); + } +}; + +// Compare two nodes based on their names. +struct NodeComparatorName { + bool operator()(const Node* n1, const Node* n2) const { + return n1->name() < n2->name(); + } +}; + // Perform a depth-first-search on g starting at the source node. // If enter is not empty, calls enter(n) before visiting any children of n. // If leave is not empty, calls leave(n) after visiting all children of n. +// If stable_comparator is set, a stable ordering of visit is achieved by +// sorting a node's neighbors first before visiting them. extern void DFS(const Graph& g, const std::function& enter, - const std::function& leave); + const std::function& leave, + const NodeComparator& stable_comparator = {}); // Perform a reverse depth-first-search on g starting at the sink node. // If enter is not empty, calls enter(n) before visiting any parents of n. // If leave is not empty, calls leave(n) after visiting all parents of n. +// If stable_comparator is set, a stable ordering of visit is achieved by +// sorting a node's neighbors first before visiting them. extern void ReverseDFS(const Graph& g, const std::function& enter, - const std::function& leave); + const std::function& leave, + const NodeComparator& stable_comparator = {}); // Perform a reverse depth-first-search on g starting at the 'start' nodes. // If enter is not empty, calls enter(n) before visiting any parents of n. // If leave is not empty, calls leave(n) after visiting all parents of n. +// If stable_comparator is set, a stable ordering of visit is achieved by +// sorting a node's neighbors first before visiting them. extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, const std::function& enter, - const std::function& leave); + const std::function& leave, + const NodeComparator& stable_comparator = {}); // Stores in *order the post-order numbering of all nodes // in graph found via a depth first search starting at the source node. @@ -50,11 +76,18 @@ extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, // Note that this is equivalent to reverse topological sorting when the // graph does not have cycles. // +// If stable_comparator is set, a stable ordering of visit is achieved by +// sorting a node's neighbors first before visiting them. +// // REQUIRES: order is not NULL. -void GetPostOrder(const Graph& g, std::vector* order); +void GetPostOrder(const Graph& g, std::vector* order, + const NodeComparator& stable_comparator = {}); // Stores in *order the reverse post-order numbering of all nodes -void GetReversePostOrder(const Graph& g, std::vector* order); +// If stable_comparator is set, a stable ordering of visit is achieved by +// sorting a node's neighbors first before visiting them. +void GetReversePostOrder(const Graph& g, std::vector* order, + const NodeComparator& stable_comparator = {}); // Prune nodes in "g" that are not in some path from the source node // to any node in 'nodes'. Returns true if changes were made to the graph. diff --git a/tensorflow/core/graph/algorithm_test.cc b/tensorflow/core/graph/algorithm_test.cc index a529760426..0cdcdb6685 100644 --- a/tensorflow/core/graph/algorithm_test.cc +++ b/tensorflow/core/graph/algorithm_test.cc @@ -112,5 +112,40 @@ TEST(AlgorithmTest, ReversePostOrder) { EXPECT_FALSE(ExpectBefore(orders, order, &error)); } +TEST(AlgorithmTest, ReversePostOrderStable) { + int64 run_count = 100; + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + for (int64 i = 0; i < run_count; ++i) { + // One source of nondeterminism comes from unordered set with key of a + // pointer type, for example the order of FlatSet depends on the + // raw pointer value of Node. Stable post order suppose to remove this + // nondeterminism by enforcing an ordering based on node ids. + GraphDefBuilder b(GraphDefBuilder::kFailImmediately); + string error; + Node* w1 = SourceOp("TestParams", b.opts().WithName("W1")); + Node* input = + SourceOp("TestInput", b.opts().WithName("input").WithControlInput(w1)); + BinaryOp("TestMul", w1, {input, 1}, b.opts().WithName("t2")); + // Insert different number of nodes between the allocation of t2 and t3, + // this creates enough entropy in the memory distance between t2 and t3 thus + // forces them to have randomized ordering had stable DFS was not + // implemented correctly. + for (int64 j = 0; j < i; ++j) { + BinaryOp("TestMul", w1, {input, 1}, + b.opts().WithName(strings::StrCat("internal", j))); + } + + BinaryOp("TestMul", w1, {input, 1}, b.opts().WithName("t3")); + + Graph g(OpRegistry::Global()); + TF_ASSERT_OK(b.ToGraph(&g)); + std::vector order; + + // Test reverse post order generates expected ordering. + GetReversePostOrder(g, &order, /*stable_comparator=*/NodeComparatorID()); + EXPECT_TRUE(ExpectBefore({{"t3", "t2"}}, order, &error)); + } +} } // namespace } // namespace tensorflow diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 5a31a6216b..54076ed1ab 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -298,12 +298,12 @@ class Edge { Node* dst() const { return dst_; } int id() const { return id_; } - // Return the number of the source output that produces the data + // Return the index of the source output that produces the data // carried by this edge. The special value kControlSlot is used // for control dependencies. int src_output() const { return src_output_; } - // Return the number of the destination input that consumes the data + // Return the index of the destination input that consumes the data // carried by this edge. The special value kControlSlot is used // for control dependencies. int dst_input() const { return dst_input_; } -- GitLab From bb6c863c10f0e9702fc29380f2ed598624897b18 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 6 Oct 2017 10:01:43 -0700 Subject: [PATCH 111/909] Deprecate op_dict argument to import_graph_def This semantics of this argument are unclear and don't seem usable (it can effectively only be used to limit the available ops to be imported). PiperOrigin-RevId: 171305211 --- tensorflow/python/framework/importer.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index eec7c4a463..c0d221ddfe 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import op_def_registry from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated_args # TODO(josh11b): SWIG the code from node_def_util instead of duplicating @@ -153,6 +154,10 @@ def _FindAttrInOpDef(attr_name, op_def): return None +@deprecated_args(None, 'Please file an issue at ' + 'https://github.com/tensorflow/tensorflow/issues if you depend' + ' on this feature.', + 'op_dict') def import_graph_def(graph_def, input_map=None, return_elements=None, name=None, op_dict=None, producer_op_list=None): """Imports the graph from `graph_def` into the current default `Graph`. @@ -177,15 +182,12 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, name: (Optional.) A prefix that will be prepended to the names in `graph_def`. Note that this does not apply to imported function names. Defaults to `"import"`. - op_dict: (Optional.) A dictionary mapping op type names to `OpDef` protos. - Must contain an `OpDef` proto for each op type named in `graph_def`. - If omitted, uses the `OpDef` protos registered in the global registry. + op_dict: (Optional.) Deprecated, do not use. producer_op_list: (Optional.) An `OpList` proto with the (possibly stripped) - list of `OpDef`s used by the producer of the graph. If provided, attrs - for ops in `graph_def` that are not in `op_dict` that have their default - value according to `producer_op_list` will be removed. This will allow - some more `GraphDef`s produced by later binaries to be accepted by - earlier binaries. + list of `OpDef`s used by the producer of the graph. If provided, + unrecognized attrs for ops in `graph_def` that have their default value + according to `producer_op_list` will be removed. This will allow some more + `GraphDef`s produced by later binaries to be accepted by earlier binaries. Returns: A list of `Operation` and/or `Tensor` objects from the imported graph, @@ -229,8 +231,7 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, name_to_op = {} - if op_dict is None: - op_dict = op_def_registry.get_registered_ops() + op_dict = op_def_registry.get_registered_ops() if producer_op_list is None: producer_op_dict = None -- GitLab From 251a1e70dc04b10fb25e8013d1ad1f27d5eda30b Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 6 Oct 2017 10:27:49 -0700 Subject: [PATCH 112/909] Add an actionable error message for build_info ImportError (#13528) This `import` statement is now the first point where we attempt to import a generated file, and hence could see a failure if the user tries to `import tensorflow` from the root of the git repository source tree. When this `import` fails, raise a more actionable error message. Fixes #13526. --- tensorflow/python/platform/self_check.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/platform/self_check.py b/tensorflow/python/platform/self_check.py index 39d38d7bbc..966a094e55 100644 --- a/tensorflow/python/platform/self_check.py +++ b/tensorflow/python/platform/self_check.py @@ -21,7 +21,13 @@ from __future__ import print_function import os -from tensorflow.python.platform import build_info +try: + from tensorflow.python.platform import build_info +except ImportError: + raise ImportError("Could not import tensorflow. Do not import tensorflow " + "from its source directory; change directory to outside " + "the TensorFlow source tree, and relaunch your Python " + "interpreter from there.") def preload_check(): -- GitLab From 08ea64c5a6748b66b310e73bb4591d091c227a33 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 10:40:52 -0700 Subject: [PATCH 113/909] [XLA:CPU] Give parameter loads a meaningful LLVM name. The typed parameter loads often get lost after optimization, but the untyped loads tend to stick around. Giving them a name helps with readability of the IR. PiperOrigin-RevId: 171310991 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 8b777bcf84..4375f13a0e 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1457,6 +1457,7 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) { llvm_ir::EmitBufferIndexingGEP(params, param_number, &ir_builder_); llvm::LoadInst* param_address_untyped = ir_builder_.CreateLoad(param_address_offset); + param_address_untyped->setName(AsStringRef(IrName(parameter, "untyped"))); if (hlo_module_config_.debug_options() .xla_llvm_enable_invariant_load_metadata()) { // We never reassign parameters, so this load is invariant. -- GitLab From 368754d8a6f4be1772b4bec9dbef686570637c5d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 10:53:51 -0700 Subject: [PATCH 114/909] Automated g4 rollback of changelist 171303938 PiperOrigin-RevId: 171313020 --- tensorflow/compiler/tf2xla/BUILD | 2 - tensorflow/compiler/tf2xla/graph_compiler.cc | 185 ------------------ tensorflow/compiler/tf2xla/graph_compiler.h | 103 ---------- tensorflow/compiler/tf2xla/xla_compiler.cc | 98 +++++----- tensorflow/compiler/tf2xla/xla_compiler.h | 2 +- .../compiler/tf2xla/xla_compiler_test.cc | 69 +------ tensorflow/compiler/xla/service/service.cc | 5 +- tensorflow/core/graph/algorithm.cc | 64 ++---- tensorflow/core/graph/algorithm.h | 43 +--- tensorflow/core/graph/algorithm_test.cc | 35 ---- tensorflow/core/graph/graph.h | 4 +- 11 files changed, 80 insertions(+), 530 deletions(-) delete mode 100644 tensorflow/compiler/tf2xla/graph_compiler.cc delete mode 100644 tensorflow/compiler/tf2xla/graph_compiler.h diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 647bfd1849..4da2ed722e 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -102,13 +102,11 @@ cc_library( "xla_helpers.cc", "xla_op_kernel.cc", "xla_op_registry.cc", - "graph_compiler.cc", "xla_cpu_backend.cc", ] + if_cuda_is_configured([ "xla_gpu_backend.cc", ]), hdrs = [ - "graph_compiler.h", "xla_compilation_device.h", "xla_compiler.h", "xla_context.h", diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc deleted file mode 100644 index c168266b16..0000000000 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/tf2xla/graph_compiler.h" - -#include -#include -#include - -#include "tensorflow/compiler/tf2xla/dump_graph.h" -#include "tensorflow/compiler/tf2xla/functionalize_control_flow.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/type_util.h" -#include "tensorflow/compiler/tf2xla/xla_compilation_device.h" -#include "tensorflow/compiler/tf2xla/xla_context.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/core/common_runtime/device.h" -#include "tensorflow/core/common_runtime/executor.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/graph_optimizer.h" -#include "tensorflow/core/framework/attr_value_util.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/public/version.h" - -namespace tensorflow { - -Status GraphCompiler::Compile() { - std::vector bindings(graph_->num_node_ids()); - std::vector topo_sorted_nodes; - // XLA requires determinism, generate a stable ordering from DFS. - GetReversePostOrder(*graph_, &topo_sorted_nodes, - /*stable_comparator=*/NodeComparatorID()); - - OpKernelContext::Params params; - PartiallySetupParams(¶ms); - - for (Node* n : topo_sorted_nodes) { - // Set up bindings. - NodeBinding& binding = bindings[n->id()]; - binding.node = n; - Status s = flib_->CreateKernel(n->def(), &binding.op_kernel); - binding.output_attrs.resize(n->num_outputs()); - if (!s.ok()) { - binding.op_kernel = nullptr; - s = AttachDef(s, *n); - LOG(ERROR) << "Executor failed to create kernel. " << s; - return s; - } - } - - // Bindings are initialized by the size of graph_->num_node_ids. However, the - // graph may contain dead nodes that still hold a valid node id. Thus - // graph_->num_node_ids could be larger than number of topo sorted nodes. - TF_RET_CHECK(bindings.size() >= topo_sorted_nodes.size()); - - for (Node* n : topo_sorted_nodes) { - TF_RET_CHECK(!n->IsRecv() && !n->IsSend() && !n->IsSwitch()) - << "Not supported node: " << n->DebugString(); - NodeBinding& binding = bindings[n->id()]; - params.op_kernel = binding.op_kernel; - params.output_attr_array = binding.output_attrs.data(); - - // tensor_inputs_ is a buffer reused across graph traversal. We clean up and - // reinitialize the buffer before we visit a new node. - tensor_inputs_.clear(); - tensor_inputs_.resize(n->num_inputs()); - - // Set up inputs from outputs of previous nodes. - for (auto* e : n->in_edges()) { - if (e->IsControlEdge()) continue; - Node* src = e->src(); - tensor_inputs_[e->dst_input()] = - bindings[src->id()].tensor_values[e->src_output()]; - } - - OpKernelContext op_context(¶ms, n->num_outputs()); - if (IsFunctional(n)) { - TF_RETURN_IF_ERROR(CompileFunctionalNode(n, &op_context)); - } else { - device_->Compute(CHECK_NOTNULL(params.op_kernel), &op_context); - Status s = op_context.status(); - TF_RETURN_IF_ERROR(s); - } - - // Set up outputs. Also check if outputs from the previous computation is - // valid. - for (int o = 0; o < n->num_outputs(); ++o) { - const auto tensor_val = op_context.release_output(o); - if (*op_context.is_output_dead() || tensor_val.tensor == nullptr) { - return errors::Internal("Missing xla_context ", o, "-th output from ", - (*op_context.is_output_dead() ? "(dead)" : ""), - SummarizeNode(*n)); - } - binding.tensor_values.push_back(tensor_val); - } - } - - // Clean up tensor data and op kernels. - for (NodeBinding& binding : bindings) { - delete binding.op_kernel; - for (auto& t : binding.tensor_values) { - if (!t.is_ref()) { - delete t.tensor; - } - } - } - return Status::OK(); -} - -bool GraphCompiler::IsFunctional(Node* n) { - return n->type_string() == FunctionLibraryDefinition::kGradientOp || - (flib_->GetFunctionLibraryDefinition()->Find(n->def().op()) != - nullptr); -} - -Status GraphCompiler::CompileFunctionalNode(Node* n, - OpKernelContext* op_context) { - TF_RET_CHECK(IsFunctional(n)); - // For functional nodes, compile them using compiler_ and call into the - // functions. - XlaOpKernelContext xla_op_context(op_context); - - std::vector arguments; - XlaCompiler::CompilationResult result; - NameAttrList func; - if (flib_->GetFunctionLibraryDefinition()->Find(n->def().op())) { - func.set_name(n->def().op()); - } else { - func.set_name(FunctionLibraryDefinition::kGradientOp); - } - *func.mutable_attr() = n->def().attr(); - - // Compile the graph using the function compiler. - TF_ASSIGN_OR_RETURN(auto computation, compiler_(func, &xla_op_context)); - XlaContext& context = XlaContext::Get(op_context); - auto* b = context.builder(); - - // Graph data handles from the inputs. - std::vector handles; - for (auto tensor : tensor_inputs_) { - auto expression = - reinterpret_cast(tensor->tensor_data().data()); - // TODO(yunxing): Support two rare cases below where input is a resource or - // contains a null handle. - TF_RET_CHECK(expression->resource() == nullptr) - << "Input with resource is not supported."; - TF_RET_CHECK(expression->handle().handle() != 0) - << "Invalid computation handle."; - handles.push_back(expression->handle()); - } - auto output_handle = b->Call(*computation, handles); - // The output handle of `Call` computation is a tuple type. Unzip it so - // that it can into fit future computations. - for (int64 idx = 0; idx < n->num_outputs(); ++idx) { - xla_op_context.SetOutput(idx, b->GetTupleElement(output_handle, idx)); - } - return b->first_error(); -} - -void GraphCompiler::PartiallySetupParams(OpKernelContext::Params* params) { - params->device = device_; - params->inputs = &tensor_inputs_; - params->step_container = step_container_; - params->resource_manager = device_->resource_manager(); -} - -} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/graph_compiler.h b/tensorflow/compiler/tf2xla/graph_compiler.h deleted file mode 100644 index 6fc0b18dcd..0000000000 --- a/tensorflow/compiler/tf2xla/graph_compiler.h +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ -#define TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ - -#include "tensorflow/compiler/tf2xla/xla_compilation_device.h" -#include "tensorflow/compiler/tf2xla/xla_context.h" -#include "tensorflow/compiler/xla/client/local_client.h" -#include "tensorflow/core/common_runtime/device.h" -#include "tensorflow/core/common_runtime/device_mgr.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/notification.h" -#include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/public/version.h" - -namespace tensorflow { - -using FunctionCompiler = - std::function>( - const NameAttrList& function, XlaOpKernelContext* xla_op_context)>; - -// GraphCompiler compiles the graph in topological order in the current -// thread. It also resolves the nondeterminism in the graph by enforcing a total -// order on all inputs to a node. This abstraction helps us create the same XLA -// computation given two structurally equivalent TensorFlow graphs. If a -// function call is visited during the graph traversal, it is then compiled -// through the FunctionCompiler into a computation and a `Call` operation is -// inserted to call into that computation. -class GraphCompiler { - public: - GraphCompiler(XlaContext* xla_context, XlaCompilationDevice* device, - Graph* graph, FunctionLibraryRuntime* flib, - ScopedStepContainer* step_container, - const FunctionCompiler& compiler) - : xla_context_(xla_context), - device_(device), - graph_(graph), - flib_(flib), - step_container_(step_container), - compiler_(compiler) {} - - // Compiles the graph. The results are written in `xla_context` that is passed - // into the compiler. - Status Compile(); - - private: - // NodeBinding is a wrapper on a `Node` that also contains computed - // TensorValue. - struct NodeBinding { - const Node* node; - // Kernel for this node, to be filled by CreateKernel. - OpKernel* op_kernel; - // Output values of this node. - std::vector tensor_values; - // Attributes of the outputs. - gtl::InlinedVector output_attrs; - }; - - // Partially sets params. This partially set params can be reused - // across multple nodes visit. - void PartiallySetupParams(OpKernelContext::Params* params); - - // Tests if a node is a functional node. A functional node represents a - // defined computation and should be compiled using `compiler_`. - bool IsFunctional(Node*); - - // Compiles a functional node and writes result to OpkernelContext. A - // functional node represents a defined computation and should be compiled - // using `compiler_`. - Status CompileFunctionalNode(Node*, OpKernelContext*); - - XlaContext* xla_context_; - XlaCompilationDevice* device_; - Graph* graph_; - FunctionLibraryRuntime* flib_; - ScopedStepContainer* step_container_; - FunctionCompiler compiler_; - // A buffer to hold tensor inputs to a node, this is reused across the graph - // traversal. - gtl::InlinedVector tensor_inputs_; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_COMPILER_TF2XLA_GRAPH_COMPILER_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 9e405578aa..8521d4167a 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -20,12 +20,10 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/functionalize_control_flow.h" -#include "tensorflow/compiler/tf2xla/graph_compiler.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/tf2xla/xla_context.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/executor.h" @@ -180,34 +178,9 @@ Status XlaCompiler::CompileFunction( namespace { -// Builds XlaCompiler argument descriptions `args` from `ctx`. -Status MakeXlaCompilerArgumentsFromInputs( - XlaOpKernelContext* ctx, std::vector* args) { - VLOG(2) << "Num inputs " << ctx->num_inputs(); - args->resize(ctx->num_inputs()); - for (int i = 0; i < ctx->num_inputs(); ++i) { - VLOG(2) << " Input " << i - << " type: " << DataTypeString(ctx->input_type(i)) - << " shape: " << ctx->InputShape(i).DebugString(); - XlaCompiler::Argument& arg = (*args)[i]; - DataType type = ctx->input_type(i); - - if (type == DT_RESOURCE) { - return errors::InvalidArgument( - "Resource as function argument is not yet implemented."); - } else { - arg.kind = XlaCompiler::Argument::kParameter; - arg.type = ctx->input_type(i); - TF_RETURN_IF_ERROR( - TensorShapeToXLAShape(arg.type, ctx->InputShape(i), &arg.shape)); - } - } - return Status::OK(); -} - -Status ExecuteGraph(XlaCompiler* compiler, XlaContext* xla_context, - std::unique_ptr graph, XlaCompilationDevice* device, - FunctionLibraryRuntime* flib, int64 step_id) { +Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr graph, + XlaCompilationDevice* device, FunctionLibraryRuntime* flib, + int64 step_id) { // Resource cleanup is a bit messy. XlaContext is a ref-counted resource; the // resource manager takes ownership via Create, and unrefs via Cleanup. We // explicitly add a reference to ensure the refcount at entry is maintained at @@ -224,27 +197,56 @@ Status ExecuteGraph(XlaCompiler* compiler, XlaContext* xla_context, TF_RETURN_IF_ERROR(device->resource_manager()->Create( step_container->name(), XlaContext::kXlaContextResourceName, xla_context)); - // Compile_func is used to tell the serial executor how to compile a function. - auto compile_func = [&](const NameAttrList& function, - XlaOpKernelContext* xla_op_context) - -> xla::StatusOr> { - std::vector arguments; - - TF_RETURN_IF_ERROR( - MakeXlaCompilerArgumentsFromInputs(xla_op_context, &arguments)); - - XlaCompiler::CompilationResult result; - TF_RETURN_IF_ERROR(compiler->CompileFunction(XlaCompiler::CompileOptions(), - function, arguments, &result)); - return result.computation; + + // Create a LocalExecutor that will own and run the graph. + // TODO(b/66947550): migrate away from using an Executor in order to guarantee + // determinism and thread-safety. + LocalExecutorParams exec_params; + exec_params.device = device; + exec_params.function_library = flib; + exec_params.create_kernel = [flib](const NodeDef& ndef, OpKernel** kernel) { + return flib->CreateKernel(ndef, kernel); + }; + exec_params.delete_kernel = [](OpKernel* kernel) { delete kernel; }; + Executor* exec_ptr = nullptr; + TF_RETURN_IF_ERROR(NewLocalExecutor(exec_params, graph.release(), &exec_ptr)); + std::unique_ptr exec(exec_ptr); + // At this point ownership of the graph has been transferred to exec. + + // Run the graph symbolically, turning the graph into an XLA computation. + Executor::Args exec_args; + exec_args.step_id = step_id; + exec_args.step_container = step_container.get(); + + // Pushes closures to run onto `worklist`. We don't run the closures directly + // from 'runner' since that might lead to a stack overflow for large graphs. + std::deque worklist; + exec_args.runner = [&](Executor::Args::Closure c) { + worklist.push_back(std::move(c)); }; - GraphCompiler graph_compiler(xla_context, device, graph.get(), flib, - step_container.get(), compile_func); - TF_RETURN_IF_ERROR(graph_compiler.Compile()); + // The following code assumes there is only one thread involved and no + // concurrency, because we did not provide Executor a threaded runner. Async + // ops on the XlaCompilation device must not use threads or concurrency + // internally. + bool done = false; + exec->RunAsync(exec_args, [&](const Status& s) { + status = s; + done = true; + }); + // Repeatedly run closures from the worklist until `done` is signalled. + while (!done) { + TF_RET_CHECK(!worklist.empty()); + Executor::Args::Closure& c = worklist.front(); + c(); + worklist.pop_front(); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR( + status, "Conversion from TensorFlow graph to XLA computation failed."); + // Explicitly clean up the step container, to capture the cleanup status. step_container.reset(); - return Status::OK(); + return status; } // Builds XLA computations for each of the arguments to the computation. @@ -492,7 +494,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, &result->input_mapping, &result->xla_input_shapes)); context->set_args(std::move(arg_expressions)); - TF_RETURN_IF_ERROR(ExecuteGraph(this, context, std::move(graph), device_, + TF_RETURN_IF_ERROR(ExecuteGraph(context, std::move(graph), device_, flib_runtime_, NextStepId())); int num_nonconst_outputs; diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 0435c619f8..35159dbad4 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -240,7 +240,7 @@ class XlaCompiler { bool use_tuple_arg = false; // If 'return_updated_values_for_all_resources' is true, then updated - // values of all resource arguments will be included in the + // values of all resource resources arguments will be included in the // 'resource_updates' of the computation, even if the resource was not // modified by the computation. Used when compiling loop bodies to ensure // the input and output signatures match. diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 88ed3b89a6..531725a623 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -63,7 +63,6 @@ class DummyReadResourceOp : public XlaOpKernel { dummy->Unref(); ctx->SetOutput(0, ctx->Input(0)); - ctx->SetOutput(1, ctx->Input(0)); } }; @@ -81,25 +80,22 @@ class DummyReadResourceCC { if (!scope.ok()) return; scope.UpdateStatus(scope.DoShapeInference(ret)); if (!scope.ok()) return; - this->output1_ = Output(ret, 0); - this->output2_ = Output(ret, 1); + this->output_ = Output(ret, 0); } + Node* node() const { return output_.node(); } - Output output1_; - Output output2_; + Output output_; }; REGISTER_OP("DummyReadResource") .Input("input: int32") - .Output("output1: int32") - .Output("output2: int32") + .Output("output: int32") .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( A dummy Op. input: dummy input. -output1: dummy output. -output2: dummy output. +output: dummy output. )doc"); REGISTER_XLA_OP(Name("DummyReadResource"), DummyReadResourceOp); @@ -320,8 +316,7 @@ TEST_F(XlaCompilerTest, ResourceManager) { Scope scope = Scope::NewRootScope().ExitOnError(); auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); auto b = DummyReadResourceCC(scope.WithOpName("B"), a); - auto c = ops::Add(scope.WithOpName("C"), b.output2_, b.output1_); - auto d = ops::_Retval(scope.WithOpName("D"), c, 0); + auto c = ops::_Retval(scope.WithOpName("C"), b.output_, 0); std::unique_ptr graph(new Graph(OpRegistry::Global())); TF_ASSERT_OK(scope.ToGraph(graph.get())); @@ -354,58 +349,6 @@ TEST_F(XlaCompilerTest, ResourceManager) { resource->Unref(); } -// Tests compilation and execution of a graph that adds two tensors. -TEST_F(XlaCompilerTest, DeterministicCompilation) { - // Builds a graph that contains a node with two output edges. The compiler - // should always traverse them in the same order. - const int64 test_count = 2; - - std::vector results(test_count); - - for (int64 i = 0; i < test_count; ++i) { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); - auto b = ops::Neg(scope.WithOpName("B"), a); - auto c = ops::Neg(scope.WithOpName("C"), a); - auto d = ops::Add(scope.WithOpName("D"), b, c); - auto e = ops::_Retval(scope.WithOpName("E"), d, 0); - std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - - // Builds a description of the argument. - std::vector args(1); - args[0].kind = XlaCompiler::Argument::kParameter; - args[0].type = DT_INT32; - args[0].shape = xla::ShapeUtil::MakeShape(xla::S32, {2}); - - // Compiles the graph. - auto options = DefaultOptions(); - XlaCompiler compiler(options); - - TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "dummy", - std::move(graph), args, &results[i])); - } - - for (int64 i = 1; i < test_count; ++i) { - auto m1 = - results[i - 1].computation->Snapshot().ValueOrDie()->entry().requests(); - auto m2 = - results[i].computation->Snapshot().ValueOrDie()->entry().requests(); - // Check if every entry is the same. - for (auto& entry1 : m1) { - int64 key = entry1.first; - auto value1 = entry1.second; - auto entry2 = m2.find(key); - auto value2 = entry2->second; - EXPECT_TRUE(entry2 != m2.end()); - string str1, str2; - value1.AppendToString(&str1); - value2.AppendToString(&str2); - EXPECT_EQ(str1, str2); - } - } -} - // Tests a computation that receives a TensorArray resource as input and // updates it. TEST_F(XlaCompilerTest, CanPassTensorArraysToAndFromComputation) { diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index d279e1f50f..bd7898a41f 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -187,9 +187,8 @@ tensorflow::Status Service::Computation(const ComputationRequest* arg, *result->mutable_computation() = computation_tracker_.NewComputation(arg->name()); - VLOG(1) << Printf("Created new computation %s on service %p, name %s", - result->computation().ShortDebugString().c_str(), this, - arg->name().c_str()); + VLOG(1) << Printf("Created new computation %s on service %p", + result->computation().ShortDebugString().c_str(), this); return tensorflow::Status::OK(); } diff --git a/tensorflow/core/graph/algorithm.cc b/tensorflow/core/graph/algorithm.cc index 6ef51aa7df..3bfba3fc4e 100644 --- a/tensorflow/core/graph/algorithm.cc +++ b/tensorflow/core/graph/algorithm.cc @@ -24,8 +24,7 @@ limitations under the License. namespace tensorflow { void DFS(const Graph& g, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator) { + const std::function& leave) { // Stack of work to do. struct Work { Node* node; @@ -52,41 +51,24 @@ void DFS(const Graph& g, const std::function& enter, // Arrange to call leave(n) when all done with descendants. if (leave) stack.push_back(Work{n, true}); - gtl::iterator_range nodes = n->out_nodes(); - auto add_work = [&visited, &stack](Node* out) { + // Arrange to work on descendants. + for (Node* out : n->out_nodes()) { if (!visited[out->id()]) { // Note; we must not mark as visited until we actually process it. stack.push_back(Work{out, false}); } - }; - - if (stable_comparator) { - std::vector nodes_sorted; - for (Node* out : nodes) { - nodes_sorted.emplace_back(out); - } - std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator); - for (Node* out : nodes_sorted) { - add_work(out); - } - } else { - for (Node* out : nodes) { - add_work(out); - } } } } void ReverseDFS(const Graph& g, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator) { - ReverseDFSFrom(g, {g.sink_node()}, enter, leave, stable_comparator); + const std::function& leave) { + ReverseDFSFrom(g, {g.sink_node()}, enter, leave); } void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator) { + const std::function& leave) { // Stack of work to do. struct Work { Node* node; @@ -115,41 +97,23 @@ void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, // Arrange to call leave(n) when all done with descendants. if (leave) stack.push_back(Work{n, true}); - gtl::iterator_range nodes = n->in_nodes(); - - auto add_work = [&visited, &stack](Node* out) { - if (!visited[out->id()]) { + // Arrange to work on parents. + for (Node* in : n->in_nodes()) { + if (!visited[in->id()]) { // Note; we must not mark as visited until we actually process it. - stack.push_back(Work{out, false}); - } - }; - - if (stable_comparator) { - std::vector nodes_sorted; - for (Node* in : nodes) { - nodes_sorted.emplace_back(in); - } - std::sort(nodes_sorted.begin(), nodes_sorted.end(), stable_comparator); - for (Node* in : nodes_sorted) { - add_work(in); - } - } else { - for (Node* in : nodes) { - add_work(in); + stack.push_back(Work{in, false}); } } } } -void GetPostOrder(const Graph& g, std::vector* order, - const NodeComparator& stable_comparator) { +void GetPostOrder(const Graph& g, std::vector* order) { order->clear(); - DFS(g, nullptr, [order](Node* n) { order->push_back(n); }, stable_comparator); + DFS(g, nullptr, [order](Node* n) { order->push_back(n); }); } -void GetReversePostOrder(const Graph& g, std::vector* order, - const NodeComparator& stable_comparator) { - GetPostOrder(g, order, stable_comparator); +void GetReversePostOrder(const Graph& g, std::vector* order) { + GetPostOrder(g, order); std::reverse(order->begin(), order->end()); } diff --git a/tensorflow/core/graph/algorithm.h b/tensorflow/core/graph/algorithm.h index 5bb6041d98..01d36e0a12 100644 --- a/tensorflow/core/graph/algorithm.h +++ b/tensorflow/core/graph/algorithm.h @@ -25,50 +25,24 @@ limitations under the License. namespace tensorflow { -// Comparator for two nodes. This is used in order to get a stable ording. -using NodeComparator = std::function; - -// Compares two node based on their ids. -struct NodeComparatorID { - bool operator()(const Node* n1, const Node* n2) const { - return n1->id() < n2->id(); - } -}; - -// Compare two nodes based on their names. -struct NodeComparatorName { - bool operator()(const Node* n1, const Node* n2) const { - return n1->name() < n2->name(); - } -}; - // Perform a depth-first-search on g starting at the source node. // If enter is not empty, calls enter(n) before visiting any children of n. // If leave is not empty, calls leave(n) after visiting all children of n. -// If stable_comparator is set, a stable ordering of visit is achieved by -// sorting a node's neighbors first before visiting them. extern void DFS(const Graph& g, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator = {}); + const std::function& leave); // Perform a reverse depth-first-search on g starting at the sink node. // If enter is not empty, calls enter(n) before visiting any parents of n. // If leave is not empty, calls leave(n) after visiting all parents of n. -// If stable_comparator is set, a stable ordering of visit is achieved by -// sorting a node's neighbors first before visiting them. extern void ReverseDFS(const Graph& g, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator = {}); + const std::function& leave); // Perform a reverse depth-first-search on g starting at the 'start' nodes. // If enter is not empty, calls enter(n) before visiting any parents of n. // If leave is not empty, calls leave(n) after visiting all parents of n. -// If stable_comparator is set, a stable ordering of visit is achieved by -// sorting a node's neighbors first before visiting them. extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, const std::function& enter, - const std::function& leave, - const NodeComparator& stable_comparator = {}); + const std::function& leave); // Stores in *order the post-order numbering of all nodes // in graph found via a depth first search starting at the source node. @@ -76,18 +50,11 @@ extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, // Note that this is equivalent to reverse topological sorting when the // graph does not have cycles. // -// If stable_comparator is set, a stable ordering of visit is achieved by -// sorting a node's neighbors first before visiting them. -// // REQUIRES: order is not NULL. -void GetPostOrder(const Graph& g, std::vector* order, - const NodeComparator& stable_comparator = {}); +void GetPostOrder(const Graph& g, std::vector* order); // Stores in *order the reverse post-order numbering of all nodes -// If stable_comparator is set, a stable ordering of visit is achieved by -// sorting a node's neighbors first before visiting them. -void GetReversePostOrder(const Graph& g, std::vector* order, - const NodeComparator& stable_comparator = {}); +void GetReversePostOrder(const Graph& g, std::vector* order); // Prune nodes in "g" that are not in some path from the source node // to any node in 'nodes'. Returns true if changes were made to the graph. diff --git a/tensorflow/core/graph/algorithm_test.cc b/tensorflow/core/graph/algorithm_test.cc index 0cdcdb6685..a529760426 100644 --- a/tensorflow/core/graph/algorithm_test.cc +++ b/tensorflow/core/graph/algorithm_test.cc @@ -112,40 +112,5 @@ TEST(AlgorithmTest, ReversePostOrder) { EXPECT_FALSE(ExpectBefore(orders, order, &error)); } -TEST(AlgorithmTest, ReversePostOrderStable) { - int64 run_count = 100; - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - for (int64 i = 0; i < run_count; ++i) { - // One source of nondeterminism comes from unordered set with key of a - // pointer type, for example the order of FlatSet depends on the - // raw pointer value of Node. Stable post order suppose to remove this - // nondeterminism by enforcing an ordering based on node ids. - GraphDefBuilder b(GraphDefBuilder::kFailImmediately); - string error; - Node* w1 = SourceOp("TestParams", b.opts().WithName("W1")); - Node* input = - SourceOp("TestInput", b.opts().WithName("input").WithControlInput(w1)); - BinaryOp("TestMul", w1, {input, 1}, b.opts().WithName("t2")); - // Insert different number of nodes between the allocation of t2 and t3, - // this creates enough entropy in the memory distance between t2 and t3 thus - // forces them to have randomized ordering had stable DFS was not - // implemented correctly. - for (int64 j = 0; j < i; ++j) { - BinaryOp("TestMul", w1, {input, 1}, - b.opts().WithName(strings::StrCat("internal", j))); - } - - BinaryOp("TestMul", w1, {input, 1}, b.opts().WithName("t3")); - - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(b.ToGraph(&g)); - std::vector order; - - // Test reverse post order generates expected ordering. - GetReversePostOrder(g, &order, /*stable_comparator=*/NodeComparatorID()); - EXPECT_TRUE(ExpectBefore({{"t3", "t2"}}, order, &error)); - } -} } // namespace } // namespace tensorflow diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 54076ed1ab..5a31a6216b 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -298,12 +298,12 @@ class Edge { Node* dst() const { return dst_; } int id() const { return id_; } - // Return the index of the source output that produces the data + // Return the number of the source output that produces the data // carried by this edge. The special value kControlSlot is used // for control dependencies. int src_output() const { return src_output_; } - // Return the index of the destination input that consumes the data + // Return the number of the destination input that consumes the data // carried by this edge. The special value kControlSlot is used // for control dependencies. int dst_input() const { return dst_input_; } -- GitLab From 9aad24f89ee9fbaa31f36087ec5fc527d7b728b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 10:54:59 -0700 Subject: [PATCH 115/909] One last data_set race condition fix. PiperOrigin-RevId: 171313226 --- .../tensor_forest/kernels/stats_ops.cc | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc index b6d57ef952..f80a34ece6 100644 --- a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc +++ b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc @@ -235,9 +235,6 @@ class ProcessInputOp : public OpKernel { string serialized_proto; OP_REQUIRES_OK(context, context->GetAttr("input_spec", &serialized_proto)); input_spec_.ParseFromString(serialized_proto); - - data_set_ = std::unique_ptr( - new TensorDataSet(input_spec_, random_seed_)); } void Compute(OpKernelContext* context) override { @@ -249,8 +246,9 @@ class ProcessInputOp : public OpKernel { const Tensor& input_weights = context->input(7); const Tensor& leaf_ids_tensor = context->input(8); - data_set_->set_input_tensors(input_data, sparse_input_indices, - sparse_input_values, sparse_input_shape); + std::unique_ptr data_set(new TensorDataSet(input_spec_, 0)); + data_set->set_input_tensors(input_data, sparse_input_indices, + sparse_input_values, sparse_input_shape); FertileStatsResource* fertile_stats_resource; OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 1), @@ -264,7 +262,7 @@ class ProcessInputOp : public OpKernel { core::ScopedUnref unref_stats(fertile_stats_resource); core::ScopedUnref unref_tree(tree_resource); - const int32 num_data = data_set_->NumItems(); + const int32 num_data = data_set->NumItems(); auto worker_threads = context->device()->tensorflow_cpu_worker_threads(); int num_threads = worker_threads->num_threads; @@ -308,23 +306,23 @@ class ProcessInputOp : public OpKernel { // from a digits run on local desktop. Heuristics might be necessary // if it really matters that much. const int64 costPerUpdate = 1000; - auto update = [this, &target, &leaf_ids_tensor, &num_targets, + auto update = [this, &target, &leaf_ids_tensor, &num_targets, &data_set, fertile_stats_resource, &locks, &set_lock, &ready_to_split, num_data](int64 start, int64 end) { CHECK(start <= end); CHECK(end <= num_data); - UpdateStats(fertile_stats_resource, data_set_, target, num_targets, + UpdateStats(fertile_stats_resource, data_set, target, num_targets, leaf_ids_tensor, &locks, &set_lock, static_cast(start), static_cast(end), &ready_to_split); }; auto update_collated = [this, &target, &num_targets, fertile_stats_resource, tree_resource, &leaf_examples, &set_lock, - &ready_to_split, + &ready_to_split, &data_set, num_leaves](int64 start, int64 end) { CHECK(start <= end); CHECK(end <= num_leaves); - UpdateStatsCollated(fertile_stats_resource, tree_resource, data_set_, + UpdateStatsCollated(fertile_stats_resource, tree_resource, data_set, target, num_targets, leaf_examples, &set_lock, static_cast(start), static_cast(end), &ready_to_split); @@ -350,7 +348,6 @@ class ProcessInputOp : public OpKernel { private: int32 random_seed_; tensorforest::TensorForestDataSpec input_spec_; - std::unique_ptr data_set_; TensorForestParams param_proto_; }; -- GitLab From dc500c869721e93ae1f3036b677a1d9d424e9d23 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 6 Oct 2017 11:03:06 -0700 Subject: [PATCH 116/909] [TF2XLA] Update device name in convert and redo check that name parsing is correct. * Update ConvertGraphToXla to use the new form for setting the assigned device name. * Remove some stale comments. * Revert workaround that allowed the requested device name to not be parsed. PiperOrigin-RevId: 171314671 --- tensorflow/compiler/tf2xla/tf2xla.cc | 5 ++-- .../compiler/tf2xla/xla_compilation_device.cc | 23 +++++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc index b7213a6cc1..a14c93a2b9 100644 --- a/tensorflow/compiler/tf2xla/tf2xla.cc +++ b/tensorflow/compiler/tf2xla/tf2xla.cc @@ -255,11 +255,10 @@ Status CreateXlaArgs(const Graph& graph, Status ConvertGraphToXla(std::unique_ptr graph, xla::Client* client, xla::Computation* computation, bool* requires_runtime_context) { - // Create a device and context to convert the graph into an XLA computation. XlaOpRegistry::RegisterCompilationKernels(); - // Populate the context with args from the graph. for (Node* node : graph->nodes()) { - node->set_assigned_device_name(DEVICE_CPU_XLA_JIT); + node->set_assigned_device_name( + strings::StrCat("/device:", DEVICE_CPU_XLA_JIT)); } std::vector xla_args; TF_RETURN_IF_ERROR(CreateXlaArgs(*graph, &xla_args)); diff --git a/tensorflow/compiler/tf2xla/xla_compilation_device.cc b/tensorflow/compiler/tf2xla/xla_compilation_device.cc index 3814a2b8b9..890a9ccb83 100644 --- a/tensorflow/compiler/tf2xla/xla_compilation_device.cc +++ b/tensorflow/compiler/tf2xla/xla_compilation_device.cc @@ -98,17 +98,20 @@ void XlaCompilationDevice::Compute(OpKernel* op_kernel, b->SetOpMetadata(metadata); DeviceNameUtils::ParsedName parsed; - if (DeviceNameUtils::ParseFullName(op_kernel->requested_device(), &parsed)) { - // If no device ID assignment is found, XLA is free to use whatever device - // it wants. In practice this usually has the effect of placing things on - // device 0. - xla::OpDeviceAssignment assignment; - if (parsed.has_id) { - assignment.set_has_device(true); - assignment.set_device(parsed.id); - } - b->SetDeviceAssignment(assignment); + OP_REQUIRES( + context, + DeviceNameUtils::ParseFullName(op_kernel->requested_device(), &parsed), + errors::Internal("Unable to parse device name: ", + op_kernel->requested_device())); + xla::OpDeviceAssignment assignment; + // If no device ID assignment is found, XLA is free to use whatever device it + // wants. In practice this usually has the effect of placing things on + // device 0. + if (parsed.has_id) { + assignment.set_has_device(true); + assignment.set_device(parsed.id); } + b->SetDeviceAssignment(assignment); op_kernel->Compute(context); -- GitLab From 71a285922a4279fd35f73271e09b90d5787746a9 Mon Sep 17 00:00:00 2001 From: James Qin Date: Fri, 6 Oct 2017 11:04:52 -0700 Subject: [PATCH 117/909] Fix a minor issue w/ allreduce PiperOrigin-RevId: 171314944 --- tensorflow/contrib/all_reduce/python/all_reduce.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py index 8e7f1791b8..22d7633ce2 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce.py @@ -762,6 +762,8 @@ def _reduce_non_singleton(input_tensors, red_f, un_op): if len(input_tensors) > 1: return red_f(input_tensors) else: + if not un_op: + return input_tensors output_tensors = [] for t in input_tensors: with ops.colocate_with(t): @@ -835,7 +837,7 @@ def _build_shuffle_hybrid(input_tensors, gather_devices, red_op, upper_level_f): def build_shuffle_then_ring(input_tensors, gather_devices, subdiv, - red_n_op, red_op, un_op): + red_n_op, red_op, un_op=None): """Construct hybrid of Shuffle within workers, Ring across workers.""" def upper_builder(tensors): return build_ring_all_reduce(tensors, len(tensors), subdiv, [0], -- GitLab From b99457c2138482470ae976a6364ce0ba754503cf Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Fri, 6 Oct 2017 11:06:12 -0700 Subject: [PATCH 118/909] [XLA] Fix a bug in ComputationBuilder::Collapse and add more tests/docs. Also updated test infrastructure so a shape mismatch does not cause a fatal crash in index_util, but rather reports an appropriate test failure message. PiperOrigin-RevId: 171315165 --- tensorflow/compiler/xla/client/client.cc | 1 + .../xla/client/computation_builder.cc | 13 ++++ .../compiler/xla/client/computation_builder.h | 10 +++ .../compiler/xla/service/shape_inference.cc | 9 ++- .../compiler/xla/tests/literal_test_util.cc | 73 ++++++++++++++----- .../compiler/xla/tests/literal_test_util.h | 2 + tensorflow/compiler/xla/tests/reshape_test.cc | 18 ++++- 7 files changed, 105 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 387253617e..7db2ea79fb 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -206,6 +206,7 @@ StatusOr> Client::Execute( *request.mutable_execution_options() = *execution_options; } for (GlobalData* argument : arguments) { + CHECK(argument != nullptr) << "Argument pointers must not be null."; *request.add_arguments() = argument->handle(); } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 15a713513f..925dcd36c0 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -489,6 +489,16 @@ ComputationDataHandle ComputationBuilder::Collapse( } std::unique_ptr original_shape = shape_or_status.ConsumeValueOrDie(); + VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); + VLOG(3) << "dims to collapse: " + << tensorflow::str_util::Join(dims_to_collapse, ","); + + if (dims_to_collapse.size() <= 1) { + // Not collapsing anything, trivially we can return the operand versus + // enqueueing a trivial reshape. + return operand; + } + std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { @@ -498,6 +508,9 @@ ComputationDataHandle ComputationBuilder::Collapse( } } + VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") + << "]"; + return Reshape(operand, new_sizes); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 73972c1290..7014685ea5 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -201,6 +201,16 @@ class ComputationBuilder { // {x=1024, y=32} by collapsing dims {0, 1, 2}. Collapsing dimensions must // be a consecutive, in-order subsequence of the operand dimensions. // + // Note that collapsing a single dimension does nothing: + // + // {256} collapsing {0} => {256} + // {1} collapsing {0} => {1} + // + // Collapsing multiple dimensions produces a single result dimension: + // + // {256, 2} collapsing {0,1} => {512} + // {256, 2, 3} collapsing {0,1} => {512, 3} + // // This could potentially cause data to be moved -- it provides a more // structured form of reshaping than an arbitrary Reshape operation. ComputationDataHandle Collapse(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index ffd8018827..29221d2d29 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1894,11 +1894,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( Shape inferred_shape = ShapeUtil::MakeShape(operand.element_type(), new_sizes); + VLOG(3) << "Reshape inferred shape: " + << ShapeUtil::HumanString(inferred_shape); if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld to=%lld", - ShapeUtil::ElementsIn(operand), ShapeUtil::ElementsIn(inferred_shape)); + "reshape operation has mismatched element counts: from=%lld (%s) " + "to=%lld (%s)", + ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), + ShapeUtil::ElementsIn(inferred_shape), + ShapeUtil::HumanString(inferred_shape).c_str()); } std::vector indices(ShapeUtil::Rank(operand)); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 061a4e190f..2876a79dd8 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,30 +39,60 @@ limitations under the License. namespace xla { -/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, - const Shape& actual) { - ASSERT_EQ(ShapeUtil::IsTuple(expected), ShapeUtil::IsTuple(actual)); +/* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( + const Shape& expected, const Shape& actual) { + if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { + return ::testing::AssertionFailure() + << "tupleness-mismatch! want: " << ShapeUtil::HumanString(expected) + << " got: " << ShapeUtil::HumanString(actual); + } if (ShapeUtil::IsTuple(expected)) { - ASSERT_EQ(ShapeUtil::TupleElementCount(expected), - ShapeUtil::TupleElementCount(actual)); + if (ShapeUtil::TupleElementCount(expected) != + ShapeUtil::TupleElementCount(actual)) { + return ::testing::AssertionFailure() + << "want tuple element count: " + << ShapeUtil::TupleElementCount(expected) + << " got tuple element count: " + << ShapeUtil::TupleElementCount(actual); + } for (int i = 0; i < expected.tuple_shapes_size(); ++i) { - AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + ::testing::AssertionResult result = + EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + if (!result) { + return result; + } } } else { - ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) - << "want rank of: " << ShapeUtil::HumanString(expected) - << " got rank of: " << ShapeUtil::HumanString(actual); - ASSERT_EQ(expected.element_type(), actual.element_type()) - << PrimitiveType_Name(expected.element_type()) << " vs " - << PrimitiveType_Name(actual.element_type()); - ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); + if (ShapeUtil::Rank(expected) != ShapeUtil::Rank(actual)) { + return ::testing::AssertionFailure() + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); + } + if (expected.element_type() != actual.element_type()) { + return ::testing::AssertionFailure() + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + } + if (expected.dimensions_size() != actual.dimensions_size()) { + return ::testing::AssertionFailure() + << "want dimensions_size " << expected.dimensions_size() + << " got dimensions_size " << actual.dimensions_size(); + } for (int i = 0; i < expected.dimensions_size(); ++i) { - ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) - << "mismatch in dimension #" << i - << " expected: " << ShapeUtil::HumanString(expected) - << " actual: " << ShapeUtil::HumanString(actual); + if (expected.dimensions(i) != actual.dimensions(i)) { + return ::testing::AssertionFailure() + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); + } } } + return ::testing::AssertionSuccess(); +} + +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_TRUE(EqualShapes(expected, actual)); } /* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( @@ -265,7 +295,14 @@ class NearComparator { VLOG(1) << "actual:"; XLA_VLOG_LINES(1, actual.ToString()); - LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); + // If the shapes mismatch, we simply fail the expectation instead of + // printing out data, as it's a type error rather than a value error. + ::testing::AssertionResult equal_shapes = + LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); + if (!equal_shapes) { + EXPECT_TRUE(equal_shapes); + return false; + } // Set up members used during the comparison. num_miscompares_ = 0; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index f645c4e8dc..467d44b857 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -50,6 +50,8 @@ class LiteralTestUtil { public: // Asserts that the given shapes have the same rank, dimension sizes, and // primitive types. + static ::testing::AssertionResult EqualShapes(const Shape& expected, + const Shape& actual); static void AssertEqualShapes(const Shape& expected, const Shape& actual); // Asserts that the provided shapes are equal as defined in AssertEqualShapes diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index bb7160e3a0..72c68f24a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -47,7 +47,7 @@ class ReshapeTest : public ClientLibraryTestBase { }; // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. -XLA_TEST_F(ReshapeTest, Trivial1x1) { +XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR2({{1.0}}); builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); @@ -55,6 +55,22 @@ XLA_TEST_F(ReshapeTest, Trivial1x1) { ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); } +XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{0}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { ComputationBuilder builder(client_, TestName()); -- GitLab From 32e044d333e85d535a27a3729ed836855383be1b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 11:24:00 -0700 Subject: [PATCH 119/909] Fix stats_collector_ null pointer error. PiperOrigin-RevId: 171318477 --- tensorflow/core/common_runtime/executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index f57834cfbe..11e063d8d2 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -2008,7 +2008,7 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { nodestats::SetAllEnd(stats); - if (!SetTimelineLabel(node, stats)) { + if (stats_collector_ != nullptr && !SetTimelineLabel(node, stats)) { // Only record non-transfer nodes. // Transfers 'stats' ownership to 'stats_collector_'. stats_collector_->Save(impl_->params_.device->name(), stats); -- GitLab From 549e651106e1e582dad0e8a6ea57b8f59ce95067 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 6 Oct 2017 11:03:06 -0700 Subject: [PATCH 120/909] [TF2XLA] Update device name in convert and redo check that name parsing is correct. * Update ConvertGraphToXla to use the new form for setting the assigned device name. * Remove some stale comments. * Revert workaround that allowed the requested device name to not be parsed. PiperOrigin-RevId: 171314671 --- tensorflow/compiler/xla/client/client.cc | 1 - .../xla/client/computation_builder.cc | 13 ---- .../compiler/xla/client/computation_builder.h | 10 --- .../compiler/xla/service/shape_inference.cc | 9 +-- .../compiler/xla/tests/literal_test_util.cc | 73 +++++-------------- .../compiler/xla/tests/literal_test_util.h | 2 - tensorflow/compiler/xla/tests/reshape_test.cc | 18 +---- .../contrib/all_reduce/python/all_reduce.py | 4 +- tensorflow/core/common_runtime/executor.cc | 2 +- 9 files changed, 23 insertions(+), 109 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 7db2ea79fb..387253617e 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -206,7 +206,6 @@ StatusOr> Client::Execute( *request.mutable_execution_options() = *execution_options; } for (GlobalData* argument : arguments) { - CHECK(argument != nullptr) << "Argument pointers must not be null."; *request.add_arguments() = argument->handle(); } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 925dcd36c0..15a713513f 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -489,16 +489,6 @@ ComputationDataHandle ComputationBuilder::Collapse( } std::unique_ptr original_shape = shape_or_status.ConsumeValueOrDie(); - VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); - VLOG(3) << "dims to collapse: " - << tensorflow::str_util::Join(dims_to_collapse, ","); - - if (dims_to_collapse.size() <= 1) { - // Not collapsing anything, trivially we can return the operand versus - // enqueueing a trivial reshape. - return operand; - } - std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { @@ -508,9 +498,6 @@ ComputationDataHandle ComputationBuilder::Collapse( } } - VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") - << "]"; - return Reshape(operand, new_sizes); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 7014685ea5..73972c1290 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -201,16 +201,6 @@ class ComputationBuilder { // {x=1024, y=32} by collapsing dims {0, 1, 2}. Collapsing dimensions must // be a consecutive, in-order subsequence of the operand dimensions. // - // Note that collapsing a single dimension does nothing: - // - // {256} collapsing {0} => {256} - // {1} collapsing {0} => {1} - // - // Collapsing multiple dimensions produces a single result dimension: - // - // {256, 2} collapsing {0,1} => {512} - // {256, 2, 3} collapsing {0,1} => {512, 3} - // // This could potentially cause data to be moved -- it provides a more // structured form of reshaping than an arbitrary Reshape operation. ComputationDataHandle Collapse(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 29221d2d29..ffd8018827 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1894,16 +1894,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( Shape inferred_shape = ShapeUtil::MakeShape(operand.element_type(), new_sizes); - VLOG(3) << "Reshape inferred shape: " - << ShapeUtil::HumanString(inferred_shape); if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld (%s) " - "to=%lld (%s)", - ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), - ShapeUtil::ElementsIn(inferred_shape), - ShapeUtil::HumanString(inferred_shape).c_str()); + "reshape operation has mismatched element counts: from=%lld to=%lld", + ShapeUtil::ElementsIn(operand), ShapeUtil::ElementsIn(inferred_shape)); } std::vector indices(ShapeUtil::Rank(operand)); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 2876a79dd8..061a4e190f 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,60 +39,30 @@ limitations under the License. namespace xla { -/* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( - const Shape& expected, const Shape& actual) { - if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { - return ::testing::AssertionFailure() - << "tupleness-mismatch! want: " << ShapeUtil::HumanString(expected) - << " got: " << ShapeUtil::HumanString(actual); - } +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_EQ(ShapeUtil::IsTuple(expected), ShapeUtil::IsTuple(actual)); if (ShapeUtil::IsTuple(expected)) { - if (ShapeUtil::TupleElementCount(expected) != - ShapeUtil::TupleElementCount(actual)) { - return ::testing::AssertionFailure() - << "want tuple element count: " - << ShapeUtil::TupleElementCount(expected) - << " got tuple element count: " - << ShapeUtil::TupleElementCount(actual); - } + ASSERT_EQ(ShapeUtil::TupleElementCount(expected), + ShapeUtil::TupleElementCount(actual)); for (int i = 0; i < expected.tuple_shapes_size(); ++i) { - ::testing::AssertionResult result = - EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); - if (!result) { - return result; - } + AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); } } else { - if (ShapeUtil::Rank(expected) != ShapeUtil::Rank(actual)) { - return ::testing::AssertionFailure() - << "want rank of: " << ShapeUtil::HumanString(expected) - << " got rank of: " << ShapeUtil::HumanString(actual); - } - if (expected.element_type() != actual.element_type()) { - return ::testing::AssertionFailure() - << PrimitiveType_Name(expected.element_type()) << " vs " - << PrimitiveType_Name(actual.element_type()); - } - if (expected.dimensions_size() != actual.dimensions_size()) { - return ::testing::AssertionFailure() - << "want dimensions_size " << expected.dimensions_size() - << " got dimensions_size " << actual.dimensions_size(); - } + ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); + ASSERT_EQ(expected.element_type(), actual.element_type()) + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); for (int i = 0; i < expected.dimensions_size(); ++i) { - if (expected.dimensions(i) != actual.dimensions(i)) { - return ::testing::AssertionFailure() - << "mismatch in dimension #" << i - << " expected: " << ShapeUtil::HumanString(expected) - << " actual: " << ShapeUtil::HumanString(actual); - } + ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); } } - return ::testing::AssertionSuccess(); -} - -/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, - const Shape& actual) { - ASSERT_TRUE(EqualShapes(expected, actual)); } /* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( @@ -295,14 +265,7 @@ class NearComparator { VLOG(1) << "actual:"; XLA_VLOG_LINES(1, actual.ToString()); - // If the shapes mismatch, we simply fail the expectation instead of - // printing out data, as it's a type error rather than a value error. - ::testing::AssertionResult equal_shapes = - LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); - if (!equal_shapes) { - EXPECT_TRUE(equal_shapes); - return false; - } + LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); // Set up members used during the comparison. num_miscompares_ = 0; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index 467d44b857..f645c4e8dc 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -50,8 +50,6 @@ class LiteralTestUtil { public: // Asserts that the given shapes have the same rank, dimension sizes, and // primitive types. - static ::testing::AssertionResult EqualShapes(const Shape& expected, - const Shape& actual); static void AssertEqualShapes(const Shape& expected, const Shape& actual); // Asserts that the provided shapes are equal as defined in AssertEqualShapes diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index 72c68f24a0..bb7160e3a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -47,7 +47,7 @@ class ReshapeTest : public ClientLibraryTestBase { }; // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. -XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { +XLA_TEST_F(ReshapeTest, Trivial1x1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR2({{1.0}}); builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); @@ -55,22 +55,6 @@ XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); } -XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) { - ComputationBuilder builder(client_, TestName()); - auto a = builder.ConstantR1({1.0}); - builder.Collapse(/*operand=*/a, /*dimensions=*/{}); - - ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); -} - -XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) { - ComputationBuilder builder(client_, TestName()); - auto a = builder.ConstantR1({1.0}); - builder.Collapse(/*operand=*/a, /*dimensions=*/{0}); - - ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); -} - // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py index 22d7633ce2..8e7f1791b8 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce.py @@ -762,8 +762,6 @@ def _reduce_non_singleton(input_tensors, red_f, un_op): if len(input_tensors) > 1: return red_f(input_tensors) else: - if not un_op: - return input_tensors output_tensors = [] for t in input_tensors: with ops.colocate_with(t): @@ -837,7 +835,7 @@ def _build_shuffle_hybrid(input_tensors, gather_devices, red_op, upper_level_f): def build_shuffle_then_ring(input_tensors, gather_devices, subdiv, - red_n_op, red_op, un_op=None): + red_n_op, red_op, un_op): """Construct hybrid of Shuffle within workers, Ring across workers.""" def upper_builder(tensors): return build_ring_all_reduce(tensors, len(tensors), subdiv, [0], diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 11e063d8d2..f57834cfbe 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -2008,7 +2008,7 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { nodestats::SetAllEnd(stats); - if (stats_collector_ != nullptr && !SetTimelineLabel(node, stats)) { + if (!SetTimelineLabel(node, stats)) { // Only record non-transfer nodes. // Transfers 'stats' ownership to 'stats_collector_'. stats_collector_->Save(impl_->params_.device->name(), stats); -- GitLab From 84b579e1d14760fc2a313c8e1d7ca100f74945a1 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 11:34:03 -0700 Subject: [PATCH 121/909] [XLA:CPU] Make EmitTargetAddressForOp return void (well, technically Status). This is a general cleanup -- less repeated code -- but it's also part of an effort to use IrArray more and llvm::Value less. In particular, many callsites would take the llvm::Value returned by EmitTargetAddressForOp and create an IrArray out of it, but then never attach AA info to that array. Having this function return void forces you to call GetIrArrayForOp(), which attaches the AA metadata appropriately. This change also gets rid of an unused arg to EmitTargetAddressForOp. PiperOrigin-RevId: 171320201 --- tensorflow/compiler/xla/client/client.cc | 1 + .../xla/client/computation_builder.cc | 13 + .../compiler/xla/client/computation_builder.h | 10 + .../compiler/xla/service/cpu/ir_emitter.cc | 242 ++++++------------ .../compiler/xla/service/cpu/ir_emitter.h | 9 +- .../compiler/xla/service/shape_inference.cc | 9 +- .../compiler/xla/tests/literal_test_util.cc | 73 ++++-- .../compiler/xla/tests/literal_test_util.h | 2 + tensorflow/compiler/xla/tests/reshape_test.cc | 18 +- .../contrib/all_reduce/python/all_reduce.py | 4 +- tensorflow/core/common_runtime/executor.cc | 2 +- 11 files changed, 195 insertions(+), 188 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 387253617e..7db2ea79fb 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -206,6 +206,7 @@ StatusOr> Client::Execute( *request.mutable_execution_options() = *execution_options; } for (GlobalData* argument : arguments) { + CHECK(argument != nullptr) << "Argument pointers must not be null."; *request.add_arguments() = argument->handle(); } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 15a713513f..925dcd36c0 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -489,6 +489,16 @@ ComputationDataHandle ComputationBuilder::Collapse( } std::unique_ptr original_shape = shape_or_status.ConsumeValueOrDie(); + VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); + VLOG(3) << "dims to collapse: " + << tensorflow::str_util::Join(dims_to_collapse, ","); + + if (dims_to_collapse.size() <= 1) { + // Not collapsing anything, trivially we can return the operand versus + // enqueueing a trivial reshape. + return operand; + } + std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { @@ -498,6 +508,9 @@ ComputationDataHandle ComputationBuilder::Collapse( } } + VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") + << "]"; + return Reshape(operand, new_sizes); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 73972c1290..7014685ea5 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -201,6 +201,16 @@ class ComputationBuilder { // {x=1024, y=32} by collapsing dims {0, 1, 2}. Collapsing dimensions must // be a consecutive, in-order subsequence of the operand dimensions. // + // Note that collapsing a single dimension does nothing: + // + // {256} collapsing {0} => {256} + // {1} collapsing {0} => {1} + // + // Collapsing multiple dimensions produces a single result dimension: + // + // {256, 2} collapsing {0,1} => {512} + // {256, 2, 3} collapsing {0,1} => {512, 3} + // // This could potentially cause data to be moved -- it provides a more // structured form of reshaping than an arbitrary Reshape operation. ComputationDataHandle Collapse(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 4375f13a0e..e4fb7c0496 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -291,8 +291,7 @@ Status IrEmitter::HandleConstant(HloInstruction* constant, Status IrEmitter::HandleCopy(HloInstruction* copy) { if (ShapeUtil::IsTuple(copy->shape())) { // kCopy shallow copies a tuple so just memcpy the top-level buffer. - TF_ASSIGN_OR_RETURN(llvm::Value * copy_value, EmitTargetAddressForOp(copy)); - emitted_value_[copy] = copy_value; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(copy)); return EmitMemcpy(*(copy->operand(0)), *copy); } else { // Use the elemental emitter for non-tuple shapes. @@ -395,9 +394,7 @@ Status IrEmitter::HandleSelect(HloInstruction* select, HloInstruction* pred, TF_RET_CHECK(pred->shape().element_type() == PRED); if (ShapeUtil::IsTuple(select->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(select)); - emitted_value_[select] = output_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(select)); llvm_ir::EmitTupleSelect(GetIrArrayForOp(select), GetIrArrayForOp(pred), GetEmittedValueFor(on_true), GetEmittedValueFor(on_false), &ir_builder_); @@ -414,8 +411,8 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { // The infeed operation produces data (dequeued from the infeed queue) at this // address, which has been provided by buffer assignment. - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(infeed)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(infeed)); + llvm_ir::IrArray infeed_array = GetIrArrayForOp(infeed); if (ShapeUtil::IsTuple(shape)) { TF_RET_CHECK(!ShapeUtil::IsNestedTuple(shape)); @@ -433,9 +430,9 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { ShapeUtil::GetTupleElementShape(shape, i); // Only the outer tuple buffer's target address is obtained from - // EmitTargetAddressForOp to handle the case when Infeed is the - // root instruction. Target addresses for internal elements can - // be obtained from EmitTempBufferPointer. + // GetEmittedValueFor, to handle the case when Infeed is the root + // instruction. Target addresses for internal elements can be obtained + // from EmitTempBufferPointer. llvm::Value* tuple_element_address = EmitTempBufferPointer(buffer, tuple_element_shape); @@ -445,15 +442,12 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { tuple_element_addresses.push_back(tuple_element_address); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, shape), - tuple_element_addresses, &ir_builder_); + llvm_ir::EmitTuple(infeed_array, tuple_element_addresses, &ir_builder_); } else { - TF_RETURN_IF_ERROR( - EmitXfeedTransfer(XfeedKind::kInfeed, shape, target_address)); + TF_RETURN_IF_ERROR(EmitXfeedTransfer(XfeedKind::kInfeed, shape, + GetEmittedValueFor(infeed))); } - emitted_value_[infeed] = target_address; - return Status::OK(); } @@ -567,15 +561,12 @@ Status IrEmitter::HandleSort(HloInstruction* sort, HloInstruction* operand) { Status IrEmitter::HandleTuple( HloInstruction* tuple, tensorflow::gtl::ArraySlice operands) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(tuple)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(tuple)); std::vector base_ptrs; for (auto operand : operands) { base_ptrs.push_back(GetEmittedValueFor(operand)); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, tuple->shape()), - base_ptrs, &ir_builder_); - emitted_value_[tuple] = target_address; + llvm_ir::EmitTuple(GetIrArrayForOp(tuple), base_ptrs, &ir_builder_); return Status::OK(); } @@ -892,11 +883,8 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, llvm_ir::IrArray lhs_array(GetIrArrayForOp(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); - Shape target_shape = dot->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dot)); - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*dot, &target_array); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dot)); + llvm_ir::IrArray target_array = GetIrArrayForOp(dot); VLOG(2) << "HandleDot: "; VLOG(2) << " lhs operand: " @@ -907,13 +895,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, << llvm_ir::DumpToString(*target_array.GetBasePointer()); // Dot operation is complicated so we delegate to a helper class. - TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation( + return DotOpEmitter::EmitDotOperation( *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, - hlo_module_config_)); - - emitted_value_[dot] = target_address; - return Status::OK(); + hlo_module_config_); } Status IrEmitter::HandleConvolution(HloInstruction* convolution, @@ -941,8 +926,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, bool one_dim_convolution = lhs_shape.dimensions_size() == 3; llvm::Value* lhs_address = GetEmittedValueFor(lhs); llvm::Value* rhs_address = GetEmittedValueFor(rhs); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(convolution)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(convolution)); const ConvolutionDimensionNumbers& dnums = convolution->convolution_dimension_numbers(); @@ -1024,35 +1008,33 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, conv_func->setDoesNotThrow(); conv_func->setOnlyAccessesArgMemory(); ir_builder_.CreateCall( - conv_func, - { - GetExecutableRunOptionsArgument(), - ir_builder_.CreateBitCast(target_address, float_ptr_type), - ir_builder_.CreateBitCast(lhs_address, float_ptr_type), - ir_builder_.CreateBitCast(rhs_address, float_ptr_type), - ir_builder_.getInt64(input_batch), - ir_builder_.getInt64(input_rows), - ir_builder_.getInt64(input_cols), - ir_builder_.getInt64(input_channels), - ir_builder_.getInt64(kernel_rows), - ir_builder_.getInt64(kernel_cols), - ir_builder_.getInt64(kernel_channels), - ir_builder_.getInt64(kernel_filters), - ir_builder_.getInt64(output_rows), - ir_builder_.getInt64(output_cols), - ir_builder_.getInt64(row_stride), - ir_builder_.getInt64(col_stride), - ir_builder_.getInt64(padding_top), - ir_builder_.getInt64(padding_bottom), - ir_builder_.getInt64(padding_left), - ir_builder_.getInt64(padding_right), - ir_builder_.getInt64(lhs_row_dilation), - ir_builder_.getInt64(lhs_col_dilation), - ir_builder_.getInt64(rhs_row_dilation), - ir_builder_.getInt64(rhs_col_dilation), - }); - target_address->setName(AsStringRef(IrName(convolution))); - emitted_value_[convolution] = target_address; + conv_func, { + GetExecutableRunOptionsArgument(), + ir_builder_.CreateBitCast( + GetEmittedValueFor(convolution), float_ptr_type), + ir_builder_.CreateBitCast(lhs_address, float_ptr_type), + ir_builder_.CreateBitCast(rhs_address, float_ptr_type), + ir_builder_.getInt64(input_batch), + ir_builder_.getInt64(input_rows), + ir_builder_.getInt64(input_cols), + ir_builder_.getInt64(input_channels), + ir_builder_.getInt64(kernel_rows), + ir_builder_.getInt64(kernel_cols), + ir_builder_.getInt64(kernel_channels), + ir_builder_.getInt64(kernel_filters), + ir_builder_.getInt64(output_rows), + ir_builder_.getInt64(output_cols), + ir_builder_.getInt64(row_stride), + ir_builder_.getInt64(col_stride), + ir_builder_.getInt64(padding_top), + ir_builder_.getInt64(padding_bottom), + ir_builder_.getInt64(padding_left), + ir_builder_.getInt64(padding_right), + ir_builder_.getInt64(lhs_row_dilation), + ir_builder_.getInt64(lhs_col_dilation), + ir_builder_.getInt64(rhs_row_dilation), + ir_builder_.getInt64(rhs_col_dilation), + }); return Status::OK(); } @@ -1367,9 +1349,7 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { mean_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "mean_var"))); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(batch_norm_training)); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(batch_norm_training)); TF_ASSIGN_OR_RETURN( const BufferAllocation::Slice slice, assignment_.GetUniqueSlice(batch_norm_training, /*index=*/{0})); @@ -1425,11 +1405,8 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { target_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "normalize"))); - llvm_ir::EmitTuple( - llvm_ir::IrArray(target_address, batch_norm_training->shape()), - {normalized, mean, var}, &ir_builder_); - emitted_value_[batch_norm_training] = target_address; - + llvm_ir::EmitTuple(GetIrArrayForOp(batch_norm_training), + {normalized, mean, var}, &ir_builder_); return Status::OK(); } @@ -1789,6 +1766,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( } CHECK(!ShapeUtil::IsTuple(reduce->shape())); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(reduce)); // We know we're not reducing over the most minor dimension, which means we // can lower the reduction loop as: @@ -1851,10 +1829,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - llvm_ir::IrArray target_array(target_address, reduce->shape()); - AddAliasingInformationToIrArray(*reduce, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1886,10 +1861,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - llvm_ir::IrArray target_array(target_address, reduce->shape()); - AddAliasingInformationToIrArray(*reduce, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1900,10 +1872,6 @@ StatusOr IrEmitter::EmitVectorizedReduce( ir_builder_.SetInsertPoint(outermost_loop_exit_block); } - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - - emitted_value_[reduce] = target_address; return true; } @@ -2003,9 +1971,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { return DefaultAction(slice); } - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(slice)); - emitted_value_[slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(slice)); if (ShapeUtil::HasZeroElements(slice->shape())) { return Status::OK(); @@ -2077,8 +2043,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { outer_dims.push_back(memcpy_dim); } - llvm_ir::IrArray target_array(target_address, slice->shape()); - AddAliasingInformationToIrArray(*slice, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(slice); const int64 num_outer_loops = outer_dims.size(); llvm_ir::ForLoopNest loops(IrName(slice), &ir_builder_); @@ -2131,10 +2096,7 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice, HloInstruction* operand, HloInstruction* /*start_indices*/) { if (ShapeUtil::IsScalar(dynamic_slice->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dynamic_slice)); - target_address->setName(AsStringRef(IrName(dynamic_slice))); - emitted_value_[dynamic_slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_slice)); return EmitMemcpy(*operand, *dynamic_slice); } return DefaultAction(dynamic_slice); @@ -2190,10 +2152,7 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, HloInstruction* update, HloInstruction* start_indices) { if (ShapeUtil::IsScalar(dynamic_update_slice->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dynamic_update_slice)); - target_address->setName(AsStringRef(IrName(dynamic_update_slice))); - emitted_value_[dynamic_update_slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); return EmitMemcpy(*update, *dynamic_update_slice); } else if (CanUpdateDynamicSliceInPlace(assignment_, dynamic_update_slice)) { VLOG(2) << "Emitting HandleDynamicUpdateSlice in-place."; @@ -2247,9 +2206,7 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, llvm_ir::LoopEmitter(loop_body_emitter, update->shape(), &ir_builder_) .EmitLoop(IrName(dynamic_update_slice, "in_place"))); - TF_ASSIGN_OR_RETURN(llvm::Value * dynamic_update_slice_address, - EmitTargetAddressForOp(dynamic_update_slice)); - emitted_value_[dynamic_update_slice] = dynamic_update_slice_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); return Status::OK(); } return DefaultAction(dynamic_update_slice); @@ -2348,11 +2305,8 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); Shape target_shape = fusion->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(fusion)); - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*fusion, &target_array); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); + llvm_ir::IrArray target_array = GetIrArrayForOp(fusion); VLOG(2) << "HandleFusion kTransposeDot: "; VLOG(2) << " lhs operand: " << llvm_ir::DumpToString(*lhs_array.GetBasePointer()); @@ -2366,8 +2320,6 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { *dot, dot->operand(0)->IsRank2Transpose(), dot->operand(1)->IsRank2Transpose(), target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_)); - - emitted_value_[fusion] = target_address; return Status::OK(); } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kLoop) { std::vector parameter_arrays; @@ -2393,14 +2345,9 @@ Status IrEmitter::HandleCall(HloInstruction* call) { parameter_addresses.push_back(GetEmittedValueFor(operand)); } - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(call)); - output_address->setName(AsStringRef(IrName(call))); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(call)); EmitArrayFunctionCallInto(call_ir_function, parameter_addresses, - output_address, computation->name()); - - emitted_value_[call] = output_address; + emitted_value_[call], computation->name()); return Status::OK(); } @@ -2429,17 +2376,13 @@ Status IrEmitter::HandleCustomCall( /*Params=*/{i8_ptr_type, operands_alloca->getType()}, /*isVarArg=*/false))); - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(custom_call)); - output_address->setName(AsStringRef(IrName(custom_call))); - - auto* output_address_arg = - ir_builder_.CreatePointerCast(output_address, i8_ptr_type); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(custom_call)); + auto* output_address_arg = ir_builder_.CreatePointerCast( + GetEmittedValueFor(custom_call), i8_ptr_type); ir_builder_.CreateCall(custom_call_ir_function, {output_address_arg, operands_alloca}); - emitted_value_[custom_call] = output_address; return Status::OK(); } @@ -2583,10 +2526,8 @@ StatusOr IrEmitter::EmitFastConcatenate( llvm::Type* i8_ptr_type = ir_builder_.getInt8PtrTy(); llvm::Type* i8_type = ir_builder_.getInt8Ty(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(concatenate)); - - llvm_ir::IrArray target_array(target_address, output_shape); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(concatenate)); + llvm_ir::IrArray target_array = GetIrArrayForOp(concatenate); llvm_ir::ForLoopNest loops(IrName(concatenate), &ir_builder_); llvm_ir::IrArray::Index outer_dims_index = @@ -2603,8 +2544,6 @@ StatusOr IrEmitter::EmitFastConcatenate( unsigned primitive_type_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type); - AddAliasingInformationToIrArray(*concatenate, &target_array); - // Contiguous subregions from each operand to the concatenate contribute to a // contiguous subregion in the target buffer starting at target_region_begin. llvm::Value* target_region_begin = ir_builder_.CreateBitCast( @@ -2647,8 +2586,6 @@ StatusOr IrEmitter::EmitFastConcatenate( SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &ir_builder_); } - emitted_value_[concatenate] = target_address; - return true; } @@ -2842,15 +2779,6 @@ Status IrEmitter::Preprocess(HloInstruction* hlo) { } Status IrEmitter::Postprocess(HloInstruction* hlo) { - // Set the name of the emitted llvm::Value to IrName(hlo). Outfeed and send - // the only ops that don't emit a value. - if (hlo->opcode() != HloOpcode::kOutfeed && - hlo->opcode() != HloOpcode::kSend) { - auto it = emitted_value_.find(hlo); - CHECK(it != emitted_value_.end()); - it->second->setName(AsStringRef(IrName(hlo))); - } - if (auto* prof_counter = GetProfileCounterFor(hlo)) { profiling_state_.RecordCycleDelta(&ir_builder_, hlo, prof_counter); } @@ -3027,10 +2955,10 @@ llvm::Value* IrEmitter::EmitArrayFunctionCall( return return_value_buffer; } -StatusOr IrEmitter::EmitTargetAddressForOp( - const HloInstruction* op, const ShapeIndex& shape_index) { - const Shape& target_shape = ShapeUtil::GetSubshape(op->shape(), shape_index); - if (op == op->parent()->root_instruction() && shape_index.empty()) { +Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) { + llvm::Value* addr; + const Shape& target_shape = op->shape(); + if (op == op->parent()->root_instruction()) { // For the root node, we write directly to the output buffer of the // function. llvm::Argument* retval = GetResultArgument(); @@ -3040,15 +2968,18 @@ StatusOr IrEmitter::EmitTargetAddressForOp( attr_builder.addDereferenceableAttr(ByteSizeOf(target_shape)); retval->addAttrs(attr_builder); } - return ir_builder_.CreateBitCast(retval, + addr = ir_builder_.CreateBitCast(retval, IrShapeType(target_shape)->getPointerTo()); - } - - // For other nodes, we need the temporary buffer allocated for this node to - // write the result into. - TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, - assignment_.GetUniqueTopLevelSlice(op)); - return EmitTempBufferPointer(slice, target_shape); + } else { + // For other nodes, we need the temporary buffer allocated for this node to + // write the result into. + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, + assignment_.GetUniqueTopLevelSlice(op)); + addr = EmitTempBufferPointer(slice, target_shape); + } + addr->setName(AsStringRef(IrName(op))); + emitted_value_[op] = addr; + return Status::OK(); } Status IrEmitter::EmitTargetElementLoop( @@ -3062,12 +2993,9 @@ Status IrEmitter::EmitTargetElementLoop( const llvm_ir::ElementGenerator& element_generator) { VLOG(2) << "EmitTargetElementLoop: " << target_op->ToString(); - // target_address will hold the address of the target buffer we will write the - // result of the computation into. const Shape& target_shape = target_op->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(target_op)); - VLOG(2) << " target address: " << llvm_ir::DumpToString(*target_address); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(target_op)); + llvm_ir::IrArray target_array = GetIrArrayForOp(target_op); if (target_op->IsMultiOutputFusion()) { // For multiple outputs fusion, we need to emit each operand and the root. @@ -3090,13 +3018,9 @@ Status IrEmitter::EmitTargetElementLoop( for (int64 i = 0; i < output_arrays.size(); ++i) { tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer()); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, target_shape), - tuple_operand_ptrs, &ir_builder_); + llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &ir_builder_); } else { - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*target_op, &target_array); - if (ShouldEmitParallelLoopFor(*target_op)) { TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop( target_shape, element_generator, IrName(target_op), &target_array)); @@ -3106,8 +3030,6 @@ Status IrEmitter::EmitTargetElementLoop( .EmitLoop(IrName(target_op))); } } - - emitted_value_[target_op] = target_address; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 05663b6038..fd9ee71799 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -353,11 +353,10 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status EmitMemcpy(const HloInstruction& source, const HloInstruction& destination); - // Emit IR to compute the target address of the buffer for the given op. - // The returned Value is a pointer to a IR type that represents the op's - // element type. - StatusOr EmitTargetAddressForOp( - const HloInstruction* op, const ShapeIndex& shape_index = {}); + // Emits IR to compute the target address of the buffer for the given op. + // After calling this function, you can get a pointer to this buffer by + // calling GetIrArrayForOp or GetEmittedValueFor. + Status EmitTargetAddressForOp(const HloInstruction* op); // Structurizes "array_elements" into an MD array that represents "shape". // This is a recursive function, and "dimension_index" indicates the index of diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index ffd8018827..29221d2d29 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1894,11 +1894,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( Shape inferred_shape = ShapeUtil::MakeShape(operand.element_type(), new_sizes); + VLOG(3) << "Reshape inferred shape: " + << ShapeUtil::HumanString(inferred_shape); if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld to=%lld", - ShapeUtil::ElementsIn(operand), ShapeUtil::ElementsIn(inferred_shape)); + "reshape operation has mismatched element counts: from=%lld (%s) " + "to=%lld (%s)", + ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), + ShapeUtil::ElementsIn(inferred_shape), + ShapeUtil::HumanString(inferred_shape).c_str()); } std::vector indices(ShapeUtil::Rank(operand)); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 061a4e190f..2876a79dd8 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,30 +39,60 @@ limitations under the License. namespace xla { -/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, - const Shape& actual) { - ASSERT_EQ(ShapeUtil::IsTuple(expected), ShapeUtil::IsTuple(actual)); +/* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( + const Shape& expected, const Shape& actual) { + if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { + return ::testing::AssertionFailure() + << "tupleness-mismatch! want: " << ShapeUtil::HumanString(expected) + << " got: " << ShapeUtil::HumanString(actual); + } if (ShapeUtil::IsTuple(expected)) { - ASSERT_EQ(ShapeUtil::TupleElementCount(expected), - ShapeUtil::TupleElementCount(actual)); + if (ShapeUtil::TupleElementCount(expected) != + ShapeUtil::TupleElementCount(actual)) { + return ::testing::AssertionFailure() + << "want tuple element count: " + << ShapeUtil::TupleElementCount(expected) + << " got tuple element count: " + << ShapeUtil::TupleElementCount(actual); + } for (int i = 0; i < expected.tuple_shapes_size(); ++i) { - AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + ::testing::AssertionResult result = + EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + if (!result) { + return result; + } } } else { - ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) - << "want rank of: " << ShapeUtil::HumanString(expected) - << " got rank of: " << ShapeUtil::HumanString(actual); - ASSERT_EQ(expected.element_type(), actual.element_type()) - << PrimitiveType_Name(expected.element_type()) << " vs " - << PrimitiveType_Name(actual.element_type()); - ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); + if (ShapeUtil::Rank(expected) != ShapeUtil::Rank(actual)) { + return ::testing::AssertionFailure() + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); + } + if (expected.element_type() != actual.element_type()) { + return ::testing::AssertionFailure() + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + } + if (expected.dimensions_size() != actual.dimensions_size()) { + return ::testing::AssertionFailure() + << "want dimensions_size " << expected.dimensions_size() + << " got dimensions_size " << actual.dimensions_size(); + } for (int i = 0; i < expected.dimensions_size(); ++i) { - ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) - << "mismatch in dimension #" << i - << " expected: " << ShapeUtil::HumanString(expected) - << " actual: " << ShapeUtil::HumanString(actual); + if (expected.dimensions(i) != actual.dimensions(i)) { + return ::testing::AssertionFailure() + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); + } } } + return ::testing::AssertionSuccess(); +} + +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_TRUE(EqualShapes(expected, actual)); } /* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( @@ -265,7 +295,14 @@ class NearComparator { VLOG(1) << "actual:"; XLA_VLOG_LINES(1, actual.ToString()); - LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); + // If the shapes mismatch, we simply fail the expectation instead of + // printing out data, as it's a type error rather than a value error. + ::testing::AssertionResult equal_shapes = + LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); + if (!equal_shapes) { + EXPECT_TRUE(equal_shapes); + return false; + } // Set up members used during the comparison. num_miscompares_ = 0; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index f645c4e8dc..467d44b857 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -50,6 +50,8 @@ class LiteralTestUtil { public: // Asserts that the given shapes have the same rank, dimension sizes, and // primitive types. + static ::testing::AssertionResult EqualShapes(const Shape& expected, + const Shape& actual); static void AssertEqualShapes(const Shape& expected, const Shape& actual); // Asserts that the provided shapes are equal as defined in AssertEqualShapes diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index bb7160e3a0..72c68f24a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -47,7 +47,7 @@ class ReshapeTest : public ClientLibraryTestBase { }; // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. -XLA_TEST_F(ReshapeTest, Trivial1x1) { +XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR2({{1.0}}); builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); @@ -55,6 +55,22 @@ XLA_TEST_F(ReshapeTest, Trivial1x1) { ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); } +XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{0}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/contrib/all_reduce/python/all_reduce.py b/tensorflow/contrib/all_reduce/python/all_reduce.py index 8e7f1791b8..22d7633ce2 100644 --- a/tensorflow/contrib/all_reduce/python/all_reduce.py +++ b/tensorflow/contrib/all_reduce/python/all_reduce.py @@ -762,6 +762,8 @@ def _reduce_non_singleton(input_tensors, red_f, un_op): if len(input_tensors) > 1: return red_f(input_tensors) else: + if not un_op: + return input_tensors output_tensors = [] for t in input_tensors: with ops.colocate_with(t): @@ -835,7 +837,7 @@ def _build_shuffle_hybrid(input_tensors, gather_devices, red_op, upper_level_f): def build_shuffle_then_ring(input_tensors, gather_devices, subdiv, - red_n_op, red_op, un_op): + red_n_op, red_op, un_op=None): """Construct hybrid of Shuffle within workers, Ring across workers.""" def upper_builder(tensors): return build_ring_all_reduce(tensors, len(tensors), subdiv, [0], diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index f57834cfbe..11e063d8d2 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -2008,7 +2008,7 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { nodestats::SetAllEnd(stats); - if (!SetTimelineLabel(node, stats)) { + if (stats_collector_ != nullptr && !SetTimelineLabel(node, stats)) { // Only record non-transfer nodes. // Transfers 'stats' ownership to 'stats_collector_'. stats_collector_->Save(impl_->params_.device->name(), stats); -- GitLab From af6e00f7c661c7d93bacfc3adc40d17f0faeb9b4 Mon Sep 17 00:00:00 2001 From: James Qin Date: Fri, 6 Oct 2017 11:04:52 -0700 Subject: [PATCH 122/909] Fix a minor issue w/ allreduce PiperOrigin-RevId: 171314944 --- tensorflow/compiler/xla/client/client.cc | 1 - .../xla/client/computation_builder.cc | 13 - .../compiler/xla/client/computation_builder.h | 10 - .../compiler/xla/service/cpu/ir_emitter.cc | 242 ++++++++++++------ .../compiler/xla/service/cpu/ir_emitter.h | 9 +- .../compiler/xla/service/shape_inference.cc | 9 +- .../compiler/xla/tests/literal_test_util.cc | 73 ++---- .../compiler/xla/tests/literal_test_util.h | 2 - tensorflow/compiler/xla/tests/reshape_test.cc | 18 +- tensorflow/core/common_runtime/executor.cc | 2 +- 10 files changed, 187 insertions(+), 192 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 7db2ea79fb..387253617e 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -206,7 +206,6 @@ StatusOr> Client::Execute( *request.mutable_execution_options() = *execution_options; } for (GlobalData* argument : arguments) { - CHECK(argument != nullptr) << "Argument pointers must not be null."; *request.add_arguments() = argument->handle(); } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 925dcd36c0..15a713513f 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -489,16 +489,6 @@ ComputationDataHandle ComputationBuilder::Collapse( } std::unique_ptr original_shape = shape_or_status.ConsumeValueOrDie(); - VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); - VLOG(3) << "dims to collapse: " - << tensorflow::str_util::Join(dims_to_collapse, ","); - - if (dims_to_collapse.size() <= 1) { - // Not collapsing anything, trivially we can return the operand versus - // enqueueing a trivial reshape. - return operand; - } - std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { @@ -508,9 +498,6 @@ ComputationDataHandle ComputationBuilder::Collapse( } } - VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") - << "]"; - return Reshape(operand, new_sizes); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 7014685ea5..73972c1290 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -201,16 +201,6 @@ class ComputationBuilder { // {x=1024, y=32} by collapsing dims {0, 1, 2}. Collapsing dimensions must // be a consecutive, in-order subsequence of the operand dimensions. // - // Note that collapsing a single dimension does nothing: - // - // {256} collapsing {0} => {256} - // {1} collapsing {0} => {1} - // - // Collapsing multiple dimensions produces a single result dimension: - // - // {256, 2} collapsing {0,1} => {512} - // {256, 2, 3} collapsing {0,1} => {512, 3} - // // This could potentially cause data to be moved -- it provides a more // structured form of reshaping than an arbitrary Reshape operation. ComputationDataHandle Collapse(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index e4fb7c0496..4375f13a0e 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -291,7 +291,8 @@ Status IrEmitter::HandleConstant(HloInstruction* constant, Status IrEmitter::HandleCopy(HloInstruction* copy) { if (ShapeUtil::IsTuple(copy->shape())) { // kCopy shallow copies a tuple so just memcpy the top-level buffer. - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(copy)); + TF_ASSIGN_OR_RETURN(llvm::Value * copy_value, EmitTargetAddressForOp(copy)); + emitted_value_[copy] = copy_value; return EmitMemcpy(*(copy->operand(0)), *copy); } else { // Use the elemental emitter for non-tuple shapes. @@ -394,7 +395,9 @@ Status IrEmitter::HandleSelect(HloInstruction* select, HloInstruction* pred, TF_RET_CHECK(pred->shape().element_type() == PRED); if (ShapeUtil::IsTuple(select->shape())) { - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(select)); + TF_ASSIGN_OR_RETURN(llvm::Value * output_address, + EmitTargetAddressForOp(select)); + emitted_value_[select] = output_address; llvm_ir::EmitTupleSelect(GetIrArrayForOp(select), GetIrArrayForOp(pred), GetEmittedValueFor(on_true), GetEmittedValueFor(on_false), &ir_builder_); @@ -411,8 +414,8 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { // The infeed operation produces data (dequeued from the infeed queue) at this // address, which has been provided by buffer assignment. - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(infeed)); - llvm_ir::IrArray infeed_array = GetIrArrayForOp(infeed); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(infeed)); if (ShapeUtil::IsTuple(shape)) { TF_RET_CHECK(!ShapeUtil::IsNestedTuple(shape)); @@ -430,9 +433,9 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { ShapeUtil::GetTupleElementShape(shape, i); // Only the outer tuple buffer's target address is obtained from - // GetEmittedValueFor, to handle the case when Infeed is the root - // instruction. Target addresses for internal elements can be obtained - // from EmitTempBufferPointer. + // EmitTargetAddressForOp to handle the case when Infeed is the + // root instruction. Target addresses for internal elements can + // be obtained from EmitTempBufferPointer. llvm::Value* tuple_element_address = EmitTempBufferPointer(buffer, tuple_element_shape); @@ -442,12 +445,15 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { tuple_element_addresses.push_back(tuple_element_address); } - llvm_ir::EmitTuple(infeed_array, tuple_element_addresses, &ir_builder_); + llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, shape), + tuple_element_addresses, &ir_builder_); } else { - TF_RETURN_IF_ERROR(EmitXfeedTransfer(XfeedKind::kInfeed, shape, - GetEmittedValueFor(infeed))); + TF_RETURN_IF_ERROR( + EmitXfeedTransfer(XfeedKind::kInfeed, shape, target_address)); } + emitted_value_[infeed] = target_address; + return Status::OK(); } @@ -561,12 +567,15 @@ Status IrEmitter::HandleSort(HloInstruction* sort, HloInstruction* operand) { Status IrEmitter::HandleTuple( HloInstruction* tuple, tensorflow::gtl::ArraySlice operands) { - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(tuple)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(tuple)); std::vector base_ptrs; for (auto operand : operands) { base_ptrs.push_back(GetEmittedValueFor(operand)); } - llvm_ir::EmitTuple(GetIrArrayForOp(tuple), base_ptrs, &ir_builder_); + llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, tuple->shape()), + base_ptrs, &ir_builder_); + emitted_value_[tuple] = target_address; return Status::OK(); } @@ -883,8 +892,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, llvm_ir::IrArray lhs_array(GetIrArrayForOp(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dot)); - llvm_ir::IrArray target_array = GetIrArrayForOp(dot); + Shape target_shape = dot->shape(); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(dot)); + llvm_ir::IrArray target_array(target_address, target_shape); + AddAliasingInformationToIrArray(*dot, &target_array); VLOG(2) << "HandleDot: "; VLOG(2) << " lhs operand: " @@ -895,10 +907,13 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, << llvm_ir::DumpToString(*target_array.GetBasePointer()); // Dot operation is complicated so we delegate to a helper class. - return DotOpEmitter::EmitDotOperation( + TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation( *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, - hlo_module_config_); + hlo_module_config_)); + + emitted_value_[dot] = target_address; + return Status::OK(); } Status IrEmitter::HandleConvolution(HloInstruction* convolution, @@ -926,7 +941,8 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, bool one_dim_convolution = lhs_shape.dimensions_size() == 3; llvm::Value* lhs_address = GetEmittedValueFor(lhs); llvm::Value* rhs_address = GetEmittedValueFor(rhs); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(convolution)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(convolution)); const ConvolutionDimensionNumbers& dnums = convolution->convolution_dimension_numbers(); @@ -1008,33 +1024,35 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, conv_func->setDoesNotThrow(); conv_func->setOnlyAccessesArgMemory(); ir_builder_.CreateCall( - conv_func, { - GetExecutableRunOptionsArgument(), - ir_builder_.CreateBitCast( - GetEmittedValueFor(convolution), float_ptr_type), - ir_builder_.CreateBitCast(lhs_address, float_ptr_type), - ir_builder_.CreateBitCast(rhs_address, float_ptr_type), - ir_builder_.getInt64(input_batch), - ir_builder_.getInt64(input_rows), - ir_builder_.getInt64(input_cols), - ir_builder_.getInt64(input_channels), - ir_builder_.getInt64(kernel_rows), - ir_builder_.getInt64(kernel_cols), - ir_builder_.getInt64(kernel_channels), - ir_builder_.getInt64(kernel_filters), - ir_builder_.getInt64(output_rows), - ir_builder_.getInt64(output_cols), - ir_builder_.getInt64(row_stride), - ir_builder_.getInt64(col_stride), - ir_builder_.getInt64(padding_top), - ir_builder_.getInt64(padding_bottom), - ir_builder_.getInt64(padding_left), - ir_builder_.getInt64(padding_right), - ir_builder_.getInt64(lhs_row_dilation), - ir_builder_.getInt64(lhs_col_dilation), - ir_builder_.getInt64(rhs_row_dilation), - ir_builder_.getInt64(rhs_col_dilation), - }); + conv_func, + { + GetExecutableRunOptionsArgument(), + ir_builder_.CreateBitCast(target_address, float_ptr_type), + ir_builder_.CreateBitCast(lhs_address, float_ptr_type), + ir_builder_.CreateBitCast(rhs_address, float_ptr_type), + ir_builder_.getInt64(input_batch), + ir_builder_.getInt64(input_rows), + ir_builder_.getInt64(input_cols), + ir_builder_.getInt64(input_channels), + ir_builder_.getInt64(kernel_rows), + ir_builder_.getInt64(kernel_cols), + ir_builder_.getInt64(kernel_channels), + ir_builder_.getInt64(kernel_filters), + ir_builder_.getInt64(output_rows), + ir_builder_.getInt64(output_cols), + ir_builder_.getInt64(row_stride), + ir_builder_.getInt64(col_stride), + ir_builder_.getInt64(padding_top), + ir_builder_.getInt64(padding_bottom), + ir_builder_.getInt64(padding_left), + ir_builder_.getInt64(padding_right), + ir_builder_.getInt64(lhs_row_dilation), + ir_builder_.getInt64(lhs_col_dilation), + ir_builder_.getInt64(rhs_row_dilation), + ir_builder_.getInt64(rhs_col_dilation), + }); + target_address->setName(AsStringRef(IrName(convolution))); + emitted_value_[convolution] = target_address; return Status::OK(); } @@ -1349,7 +1367,9 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { mean_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "mean_var"))); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(batch_norm_training)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(batch_norm_training)); + TF_ASSIGN_OR_RETURN( const BufferAllocation::Slice slice, assignment_.GetUniqueSlice(batch_norm_training, /*index=*/{0})); @@ -1405,8 +1425,11 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { target_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "normalize"))); - llvm_ir::EmitTuple(GetIrArrayForOp(batch_norm_training), - {normalized, mean, var}, &ir_builder_); + llvm_ir::EmitTuple( + llvm_ir::IrArray(target_address, batch_norm_training->shape()), + {normalized, mean, var}, &ir_builder_); + emitted_value_[batch_norm_training] = target_address; + return Status::OK(); } @@ -1766,7 +1789,6 @@ StatusOr IrEmitter::EmitVectorizedReduce( } CHECK(!ShapeUtil::IsTuple(reduce->shape())); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(reduce)); // We know we're not reducing over the most minor dimension, which means we // can lower the reduction loop as: @@ -1829,7 +1851,10 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(reduce)); + llvm_ir::IrArray target_array(target_address, reduce->shape()); + AddAliasingInformationToIrArray(*reduce, &target_array); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1861,7 +1886,10 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(reduce)); + llvm_ir::IrArray target_array(target_address, reduce->shape()); + AddAliasingInformationToIrArray(*reduce, &target_array); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1872,6 +1900,10 @@ StatusOr IrEmitter::EmitVectorizedReduce( ir_builder_.SetInsertPoint(outermost_loop_exit_block); } + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(reduce)); + + emitted_value_[reduce] = target_address; return true; } @@ -1971,7 +2003,9 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { return DefaultAction(slice); } - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(slice)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(slice)); + emitted_value_[slice] = target_address; if (ShapeUtil::HasZeroElements(slice->shape())) { return Status::OK(); @@ -2043,7 +2077,8 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { outer_dims.push_back(memcpy_dim); } - llvm_ir::IrArray target_array = GetIrArrayForOp(slice); + llvm_ir::IrArray target_array(target_address, slice->shape()); + AddAliasingInformationToIrArray(*slice, &target_array); const int64 num_outer_loops = outer_dims.size(); llvm_ir::ForLoopNest loops(IrName(slice), &ir_builder_); @@ -2096,7 +2131,10 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice, HloInstruction* operand, HloInstruction* /*start_indices*/) { if (ShapeUtil::IsScalar(dynamic_slice->shape())) { - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_slice)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(dynamic_slice)); + target_address->setName(AsStringRef(IrName(dynamic_slice))); + emitted_value_[dynamic_slice] = target_address; return EmitMemcpy(*operand, *dynamic_slice); } return DefaultAction(dynamic_slice); @@ -2152,7 +2190,10 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, HloInstruction* update, HloInstruction* start_indices) { if (ShapeUtil::IsScalar(dynamic_update_slice->shape())) { - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(dynamic_update_slice)); + target_address->setName(AsStringRef(IrName(dynamic_update_slice))); + emitted_value_[dynamic_update_slice] = target_address; return EmitMemcpy(*update, *dynamic_update_slice); } else if (CanUpdateDynamicSliceInPlace(assignment_, dynamic_update_slice)) { VLOG(2) << "Emitting HandleDynamicUpdateSlice in-place."; @@ -2206,7 +2247,9 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, llvm_ir::LoopEmitter(loop_body_emitter, update->shape(), &ir_builder_) .EmitLoop(IrName(dynamic_update_slice, "in_place"))); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); + TF_ASSIGN_OR_RETURN(llvm::Value * dynamic_update_slice_address, + EmitTargetAddressForOp(dynamic_update_slice)); + emitted_value_[dynamic_update_slice] = dynamic_update_slice_address; return Status::OK(); } return DefaultAction(dynamic_update_slice); @@ -2305,8 +2348,11 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); Shape target_shape = fusion->shape(); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); - llvm_ir::IrArray target_array = GetIrArrayForOp(fusion); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(fusion)); + llvm_ir::IrArray target_array(target_address, target_shape); + AddAliasingInformationToIrArray(*fusion, &target_array); + VLOG(2) << "HandleFusion kTransposeDot: "; VLOG(2) << " lhs operand: " << llvm_ir::DumpToString(*lhs_array.GetBasePointer()); @@ -2320,6 +2366,8 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { *dot, dot->operand(0)->IsRank2Transpose(), dot->operand(1)->IsRank2Transpose(), target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_)); + + emitted_value_[fusion] = target_address; return Status::OK(); } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kLoop) { std::vector parameter_arrays; @@ -2345,9 +2393,14 @@ Status IrEmitter::HandleCall(HloInstruction* call) { parameter_addresses.push_back(GetEmittedValueFor(operand)); } - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(call)); + TF_ASSIGN_OR_RETURN(llvm::Value * output_address, + EmitTargetAddressForOp(call)); + output_address->setName(AsStringRef(IrName(call))); + EmitArrayFunctionCallInto(call_ir_function, parameter_addresses, - emitted_value_[call], computation->name()); + output_address, computation->name()); + + emitted_value_[call] = output_address; return Status::OK(); } @@ -2376,13 +2429,17 @@ Status IrEmitter::HandleCustomCall( /*Params=*/{i8_ptr_type, operands_alloca->getType()}, /*isVarArg=*/false))); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(custom_call)); - auto* output_address_arg = ir_builder_.CreatePointerCast( - GetEmittedValueFor(custom_call), i8_ptr_type); + TF_ASSIGN_OR_RETURN(llvm::Value * output_address, + EmitTargetAddressForOp(custom_call)); + output_address->setName(AsStringRef(IrName(custom_call))); + + auto* output_address_arg = + ir_builder_.CreatePointerCast(output_address, i8_ptr_type); ir_builder_.CreateCall(custom_call_ir_function, {output_address_arg, operands_alloca}); + emitted_value_[custom_call] = output_address; return Status::OK(); } @@ -2526,8 +2583,10 @@ StatusOr IrEmitter::EmitFastConcatenate( llvm::Type* i8_ptr_type = ir_builder_.getInt8PtrTy(); llvm::Type* i8_type = ir_builder_.getInt8Ty(); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(concatenate)); - llvm_ir::IrArray target_array = GetIrArrayForOp(concatenate); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(concatenate)); + + llvm_ir::IrArray target_array(target_address, output_shape); llvm_ir::ForLoopNest loops(IrName(concatenate), &ir_builder_); llvm_ir::IrArray::Index outer_dims_index = @@ -2544,6 +2603,8 @@ StatusOr IrEmitter::EmitFastConcatenate( unsigned primitive_type_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type); + AddAliasingInformationToIrArray(*concatenate, &target_array); + // Contiguous subregions from each operand to the concatenate contribute to a // contiguous subregion in the target buffer starting at target_region_begin. llvm::Value* target_region_begin = ir_builder_.CreateBitCast( @@ -2586,6 +2647,8 @@ StatusOr IrEmitter::EmitFastConcatenate( SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &ir_builder_); } + emitted_value_[concatenate] = target_address; + return true; } @@ -2779,6 +2842,15 @@ Status IrEmitter::Preprocess(HloInstruction* hlo) { } Status IrEmitter::Postprocess(HloInstruction* hlo) { + // Set the name of the emitted llvm::Value to IrName(hlo). Outfeed and send + // the only ops that don't emit a value. + if (hlo->opcode() != HloOpcode::kOutfeed && + hlo->opcode() != HloOpcode::kSend) { + auto it = emitted_value_.find(hlo); + CHECK(it != emitted_value_.end()); + it->second->setName(AsStringRef(IrName(hlo))); + } + if (auto* prof_counter = GetProfileCounterFor(hlo)) { profiling_state_.RecordCycleDelta(&ir_builder_, hlo, prof_counter); } @@ -2955,10 +3027,10 @@ llvm::Value* IrEmitter::EmitArrayFunctionCall( return return_value_buffer; } -Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) { - llvm::Value* addr; - const Shape& target_shape = op->shape(); - if (op == op->parent()->root_instruction()) { +StatusOr IrEmitter::EmitTargetAddressForOp( + const HloInstruction* op, const ShapeIndex& shape_index) { + const Shape& target_shape = ShapeUtil::GetSubshape(op->shape(), shape_index); + if (op == op->parent()->root_instruction() && shape_index.empty()) { // For the root node, we write directly to the output buffer of the // function. llvm::Argument* retval = GetResultArgument(); @@ -2968,18 +3040,15 @@ Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) { attr_builder.addDereferenceableAttr(ByteSizeOf(target_shape)); retval->addAttrs(attr_builder); } - addr = ir_builder_.CreateBitCast(retval, + return ir_builder_.CreateBitCast(retval, IrShapeType(target_shape)->getPointerTo()); - } else { - // For other nodes, we need the temporary buffer allocated for this node to - // write the result into. - TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, - assignment_.GetUniqueTopLevelSlice(op)); - addr = EmitTempBufferPointer(slice, target_shape); - } - addr->setName(AsStringRef(IrName(op))); - emitted_value_[op] = addr; - return Status::OK(); + } + + // For other nodes, we need the temporary buffer allocated for this node to + // write the result into. + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, + assignment_.GetUniqueTopLevelSlice(op)); + return EmitTempBufferPointer(slice, target_shape); } Status IrEmitter::EmitTargetElementLoop( @@ -2993,9 +3062,12 @@ Status IrEmitter::EmitTargetElementLoop( const llvm_ir::ElementGenerator& element_generator) { VLOG(2) << "EmitTargetElementLoop: " << target_op->ToString(); + // target_address will hold the address of the target buffer we will write the + // result of the computation into. const Shape& target_shape = target_op->shape(); - TF_RETURN_IF_ERROR(EmitTargetAddressForOp(target_op)); - llvm_ir::IrArray target_array = GetIrArrayForOp(target_op); + TF_ASSIGN_OR_RETURN(llvm::Value * target_address, + EmitTargetAddressForOp(target_op)); + VLOG(2) << " target address: " << llvm_ir::DumpToString(*target_address); if (target_op->IsMultiOutputFusion()) { // For multiple outputs fusion, we need to emit each operand and the root. @@ -3018,9 +3090,13 @@ Status IrEmitter::EmitTargetElementLoop( for (int64 i = 0; i < output_arrays.size(); ++i) { tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer()); } - llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &ir_builder_); + llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, target_shape), + tuple_operand_ptrs, &ir_builder_); } else { + llvm_ir::IrArray target_array(target_address, target_shape); + AddAliasingInformationToIrArray(*target_op, &target_array); + if (ShouldEmitParallelLoopFor(*target_op)) { TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop( target_shape, element_generator, IrName(target_op), &target_array)); @@ -3030,6 +3106,8 @@ Status IrEmitter::EmitTargetElementLoop( .EmitLoop(IrName(target_op))); } } + + emitted_value_[target_op] = target_address; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index fd9ee71799..05663b6038 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -353,10 +353,11 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status EmitMemcpy(const HloInstruction& source, const HloInstruction& destination); - // Emits IR to compute the target address of the buffer for the given op. - // After calling this function, you can get a pointer to this buffer by - // calling GetIrArrayForOp or GetEmittedValueFor. - Status EmitTargetAddressForOp(const HloInstruction* op); + // Emit IR to compute the target address of the buffer for the given op. + // The returned Value is a pointer to a IR type that represents the op's + // element type. + StatusOr EmitTargetAddressForOp( + const HloInstruction* op, const ShapeIndex& shape_index = {}); // Structurizes "array_elements" into an MD array that represents "shape". // This is a recursive function, and "dimension_index" indicates the index of diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 29221d2d29..ffd8018827 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1894,16 +1894,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( Shape inferred_shape = ShapeUtil::MakeShape(operand.element_type(), new_sizes); - VLOG(3) << "Reshape inferred shape: " - << ShapeUtil::HumanString(inferred_shape); if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld (%s) " - "to=%lld (%s)", - ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), - ShapeUtil::ElementsIn(inferred_shape), - ShapeUtil::HumanString(inferred_shape).c_str()); + "reshape operation has mismatched element counts: from=%lld to=%lld", + ShapeUtil::ElementsIn(operand), ShapeUtil::ElementsIn(inferred_shape)); } std::vector indices(ShapeUtil::Rank(operand)); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 2876a79dd8..061a4e190f 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,60 +39,30 @@ limitations under the License. namespace xla { -/* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( - const Shape& expected, const Shape& actual) { - if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { - return ::testing::AssertionFailure() - << "tupleness-mismatch! want: " << ShapeUtil::HumanString(expected) - << " got: " << ShapeUtil::HumanString(actual); - } +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_EQ(ShapeUtil::IsTuple(expected), ShapeUtil::IsTuple(actual)); if (ShapeUtil::IsTuple(expected)) { - if (ShapeUtil::TupleElementCount(expected) != - ShapeUtil::TupleElementCount(actual)) { - return ::testing::AssertionFailure() - << "want tuple element count: " - << ShapeUtil::TupleElementCount(expected) - << " got tuple element count: " - << ShapeUtil::TupleElementCount(actual); - } + ASSERT_EQ(ShapeUtil::TupleElementCount(expected), + ShapeUtil::TupleElementCount(actual)); for (int i = 0; i < expected.tuple_shapes_size(); ++i) { - ::testing::AssertionResult result = - EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); - if (!result) { - return result; - } + AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); } } else { - if (ShapeUtil::Rank(expected) != ShapeUtil::Rank(actual)) { - return ::testing::AssertionFailure() - << "want rank of: " << ShapeUtil::HumanString(expected) - << " got rank of: " << ShapeUtil::HumanString(actual); - } - if (expected.element_type() != actual.element_type()) { - return ::testing::AssertionFailure() - << PrimitiveType_Name(expected.element_type()) << " vs " - << PrimitiveType_Name(actual.element_type()); - } - if (expected.dimensions_size() != actual.dimensions_size()) { - return ::testing::AssertionFailure() - << "want dimensions_size " << expected.dimensions_size() - << " got dimensions_size " << actual.dimensions_size(); - } + ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); + ASSERT_EQ(expected.element_type(), actual.element_type()) + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); for (int i = 0; i < expected.dimensions_size(); ++i) { - if (expected.dimensions(i) != actual.dimensions(i)) { - return ::testing::AssertionFailure() - << "mismatch in dimension #" << i - << " expected: " << ShapeUtil::HumanString(expected) - << " actual: " << ShapeUtil::HumanString(actual); - } + ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); } } - return ::testing::AssertionSuccess(); -} - -/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, - const Shape& actual) { - ASSERT_TRUE(EqualShapes(expected, actual)); } /* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( @@ -295,14 +265,7 @@ class NearComparator { VLOG(1) << "actual:"; XLA_VLOG_LINES(1, actual.ToString()); - // If the shapes mismatch, we simply fail the expectation instead of - // printing out data, as it's a type error rather than a value error. - ::testing::AssertionResult equal_shapes = - LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); - if (!equal_shapes) { - EXPECT_TRUE(equal_shapes); - return false; - } + LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); // Set up members used during the comparison. num_miscompares_ = 0; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index 467d44b857..f645c4e8dc 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -50,8 +50,6 @@ class LiteralTestUtil { public: // Asserts that the given shapes have the same rank, dimension sizes, and // primitive types. - static ::testing::AssertionResult EqualShapes(const Shape& expected, - const Shape& actual); static void AssertEqualShapes(const Shape& expected, const Shape& actual); // Asserts that the provided shapes are equal as defined in AssertEqualShapes diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index 72c68f24a0..bb7160e3a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -47,7 +47,7 @@ class ReshapeTest : public ClientLibraryTestBase { }; // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. -XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { +XLA_TEST_F(ReshapeTest, Trivial1x1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR2({{1.0}}); builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); @@ -55,22 +55,6 @@ XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); } -XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) { - ComputationBuilder builder(client_, TestName()); - auto a = builder.ConstantR1({1.0}); - builder.Collapse(/*operand=*/a, /*dimensions=*/{}); - - ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); -} - -XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) { - ComputationBuilder builder(client_, TestName()); - auto a = builder.ConstantR1({1.0}); - builder.Collapse(/*operand=*/a, /*dimensions=*/{0}); - - ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); -} - // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 11e063d8d2..f57834cfbe 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -2008,7 +2008,7 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { nodestats::SetAllEnd(stats); - if (stats_collector_ != nullptr && !SetTimelineLabel(node, stats)) { + if (!SetTimelineLabel(node, stats)) { // Only record non-transfer nodes. // Transfers 'stats' ownership to 'stats_collector_'. stats_collector_->Save(impl_->params_.device->name(), stats); -- GitLab From d749f56a3e0b17a5fe5f3252446223b84e485f04 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Fri, 6 Oct 2017 11:06:12 -0700 Subject: [PATCH 123/909] [XLA] Fix a bug in ComputationBuilder::Collapse and add more tests/docs. Also updated test infrastructure so a shape mismatch does not cause a fatal crash in index_util, but rather reports an appropriate test failure message. PiperOrigin-RevId: 171315165 --- tensorflow/compiler/xla/client/client.cc | 1 + .../xla/client/computation_builder.cc | 13 ++++ .../compiler/xla/client/computation_builder.h | 10 +++ .../compiler/xla/service/shape_inference.cc | 9 ++- .../compiler/xla/tests/literal_test_util.cc | 73 ++++++++++++++----- .../compiler/xla/tests/literal_test_util.h | 2 + tensorflow/compiler/xla/tests/reshape_test.cc | 18 ++++- 7 files changed, 105 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 387253617e..7db2ea79fb 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -206,6 +206,7 @@ StatusOr> Client::Execute( *request.mutable_execution_options() = *execution_options; } for (GlobalData* argument : arguments) { + CHECK(argument != nullptr) << "Argument pointers must not be null."; *request.add_arguments() = argument->handle(); } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 15a713513f..925dcd36c0 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -489,6 +489,16 @@ ComputationDataHandle ComputationBuilder::Collapse( } std::unique_ptr original_shape = shape_or_status.ConsumeValueOrDie(); + VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); + VLOG(3) << "dims to collapse: " + << tensorflow::str_util::Join(dims_to_collapse, ","); + + if (dims_to_collapse.size() <= 1) { + // Not collapsing anything, trivially we can return the operand versus + // enqueueing a trivial reshape. + return operand; + } + std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { @@ -498,6 +508,9 @@ ComputationDataHandle ComputationBuilder::Collapse( } } + VLOG(3) << "new sizes: [" << tensorflow::str_util::Join(new_sizes, ",") + << "]"; + return Reshape(operand, new_sizes); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 73972c1290..7014685ea5 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -201,6 +201,16 @@ class ComputationBuilder { // {x=1024, y=32} by collapsing dims {0, 1, 2}. Collapsing dimensions must // be a consecutive, in-order subsequence of the operand dimensions. // + // Note that collapsing a single dimension does nothing: + // + // {256} collapsing {0} => {256} + // {1} collapsing {0} => {1} + // + // Collapsing multiple dimensions produces a single result dimension: + // + // {256, 2} collapsing {0,1} => {512} + // {256, 2, 3} collapsing {0,1} => {512, 3} + // // This could potentially cause data to be moved -- it provides a more // structured form of reshaping than an arbitrary Reshape operation. ComputationDataHandle Collapse(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index ffd8018827..29221d2d29 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -1894,11 +1894,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( Shape inferred_shape = ShapeUtil::MakeShape(operand.element_type(), new_sizes); + VLOG(3) << "Reshape inferred shape: " + << ShapeUtil::HumanString(inferred_shape); if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld to=%lld", - ShapeUtil::ElementsIn(operand), ShapeUtil::ElementsIn(inferred_shape)); + "reshape operation has mismatched element counts: from=%lld (%s) " + "to=%lld (%s)", + ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), + ShapeUtil::ElementsIn(inferred_shape), + ShapeUtil::HumanString(inferred_shape).c_str()); } std::vector indices(ShapeUtil::Rank(operand)); diff --git a/tensorflow/compiler/xla/tests/literal_test_util.cc b/tensorflow/compiler/xla/tests/literal_test_util.cc index 061a4e190f..2876a79dd8 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.cc +++ b/tensorflow/compiler/xla/tests/literal_test_util.cc @@ -39,30 +39,60 @@ limitations under the License. namespace xla { -/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, - const Shape& actual) { - ASSERT_EQ(ShapeUtil::IsTuple(expected), ShapeUtil::IsTuple(actual)); +/* static */ ::testing::AssertionResult LiteralTestUtil::EqualShapes( + const Shape& expected, const Shape& actual) { + if (ShapeUtil::IsTuple(expected) != ShapeUtil::IsTuple(actual)) { + return ::testing::AssertionFailure() + << "tupleness-mismatch! want: " << ShapeUtil::HumanString(expected) + << " got: " << ShapeUtil::HumanString(actual); + } if (ShapeUtil::IsTuple(expected)) { - ASSERT_EQ(ShapeUtil::TupleElementCount(expected), - ShapeUtil::TupleElementCount(actual)); + if (ShapeUtil::TupleElementCount(expected) != + ShapeUtil::TupleElementCount(actual)) { + return ::testing::AssertionFailure() + << "want tuple element count: " + << ShapeUtil::TupleElementCount(expected) + << " got tuple element count: " + << ShapeUtil::TupleElementCount(actual); + } for (int i = 0; i < expected.tuple_shapes_size(); ++i) { - AssertEqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + ::testing::AssertionResult result = + EqualShapes(expected.tuple_shapes(i), actual.tuple_shapes(i)); + if (!result) { + return result; + } } } else { - ASSERT_EQ(ShapeUtil::Rank(expected), ShapeUtil::Rank(actual)) - << "want rank of: " << ShapeUtil::HumanString(expected) - << " got rank of: " << ShapeUtil::HumanString(actual); - ASSERT_EQ(expected.element_type(), actual.element_type()) - << PrimitiveType_Name(expected.element_type()) << " vs " - << PrimitiveType_Name(actual.element_type()); - ASSERT_EQ(expected.dimensions_size(), actual.dimensions_size()); + if (ShapeUtil::Rank(expected) != ShapeUtil::Rank(actual)) { + return ::testing::AssertionFailure() + << "want rank of: " << ShapeUtil::HumanString(expected) + << " got rank of: " << ShapeUtil::HumanString(actual); + } + if (expected.element_type() != actual.element_type()) { + return ::testing::AssertionFailure() + << PrimitiveType_Name(expected.element_type()) << " vs " + << PrimitiveType_Name(actual.element_type()); + } + if (expected.dimensions_size() != actual.dimensions_size()) { + return ::testing::AssertionFailure() + << "want dimensions_size " << expected.dimensions_size() + << " got dimensions_size " << actual.dimensions_size(); + } for (int i = 0; i < expected.dimensions_size(); ++i) { - ASSERT_EQ(expected.dimensions(i), actual.dimensions(i)) - << "mismatch in dimension #" << i - << " expected: " << ShapeUtil::HumanString(expected) - << " actual: " << ShapeUtil::HumanString(actual); + if (expected.dimensions(i) != actual.dimensions(i)) { + return ::testing::AssertionFailure() + << "mismatch in dimension #" << i + << " expected: " << ShapeUtil::HumanString(expected) + << " actual: " << ShapeUtil::HumanString(actual); + } } } + return ::testing::AssertionSuccess(); +} + +/* static */ void LiteralTestUtil::AssertEqualShapes(const Shape& expected, + const Shape& actual) { + ASSERT_TRUE(EqualShapes(expected, actual)); } /* static */ void LiteralTestUtil::AssertEqualShapesAndLayouts( @@ -265,7 +295,14 @@ class NearComparator { VLOG(1) << "actual:"; XLA_VLOG_LINES(1, actual.ToString()); - LiteralTestUtil::AssertEqualShapes(expected.shape(), actual.shape()); + // If the shapes mismatch, we simply fail the expectation instead of + // printing out data, as it's a type error rather than a value error. + ::testing::AssertionResult equal_shapes = + LiteralTestUtil::EqualShapes(expected.shape(), actual.shape()); + if (!equal_shapes) { + EXPECT_TRUE(equal_shapes); + return false; + } // Set up members used during the comparison. num_miscompares_ = 0; diff --git a/tensorflow/compiler/xla/tests/literal_test_util.h b/tensorflow/compiler/xla/tests/literal_test_util.h index f645c4e8dc..467d44b857 100644 --- a/tensorflow/compiler/xla/tests/literal_test_util.h +++ b/tensorflow/compiler/xla/tests/literal_test_util.h @@ -50,6 +50,8 @@ class LiteralTestUtil { public: // Asserts that the given shapes have the same rank, dimension sizes, and // primitive types. + static ::testing::AssertionResult EqualShapes(const Shape& expected, + const Shape& actual); static void AssertEqualShapes(const Shape& expected, const Shape& actual); // Asserts that the provided shapes are equal as defined in AssertEqualShapes diff --git a/tensorflow/compiler/xla/tests/reshape_test.cc b/tensorflow/compiler/xla/tests/reshape_test.cc index bb7160e3a0..72c68f24a0 100644 --- a/tensorflow/compiler/xla/tests/reshape_test.cc +++ b/tensorflow/compiler/xla/tests/reshape_test.cc @@ -47,7 +47,7 @@ class ReshapeTest : public ClientLibraryTestBase { }; // Collapses 2-dimensional pseudo-scalar (single-element array) to 1 dimension. -XLA_TEST_F(ReshapeTest, Trivial1x1) { +XLA_TEST_F(ReshapeTest, CollapseTrivial1x1) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR2({{1.0}}); builder.Collapse(/*operand=*/a, /*dimensions=*/{0, 1}); @@ -55,6 +55,22 @@ XLA_TEST_F(ReshapeTest, Trivial1x1) { ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); } +XLA_TEST_F(ReshapeTest, CollapseTrivialR1EmptyDims) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + +XLA_TEST_F(ReshapeTest, CollapseTrivialR1OnlyDim) { + ComputationBuilder builder(client_, TestName()); + auto a = builder.ConstantR1({1.0}); + builder.Collapse(/*operand=*/a, /*dimensions=*/{0}); + + ComputeAndCompareR1(&builder, {1.0f}, {}, zero_error_spec_); +} + // Collapses 2-dimensional pseudo-scalar (single-element array) to scalar. XLA_TEST_F(ReshapeTest, SingleElementArrayToScalar) { ComputationBuilder builder(client_, TestName()); -- GitLab From ce2f89c8bfdbef373c1b1ff9a1c6818f6bf462f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 11:24:00 -0700 Subject: [PATCH 124/909] Fix stats_collector_ null pointer error. PiperOrigin-RevId: 171318477 --- tensorflow/core/common_runtime/executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index f57834cfbe..11e063d8d2 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -2008,7 +2008,7 @@ bool ExecutorState::NodeDone(const Status& s, const Node* node, NodeExecStatsWrapper* stats, TaggedNodeReadyQueue* inline_ready) { nodestats::SetAllEnd(stats); - if (!SetTimelineLabel(node, stats)) { + if (stats_collector_ != nullptr && !SetTimelineLabel(node, stats)) { // Only record non-transfer nodes. // Transfers 'stats' ownership to 'stats_collector_'. stats_collector_->Save(impl_->params_.device->name(), stats); -- GitLab From 7fceb8d879dd23a2fd15403d216367e5e8f52b56 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 11:34:03 -0700 Subject: [PATCH 125/909] [XLA:CPU] Make EmitTargetAddressForOp return void (well, technically Status). This is a general cleanup -- less repeated code -- but it's also part of an effort to use IrArray more and llvm::Value less. In particular, many callsites would take the llvm::Value returned by EmitTargetAddressForOp and create an IrArray out of it, but then never attach AA info to that array. Having this function return void forces you to call GetIrArrayForOp(), which attaches the AA metadata appropriately. This change also gets rid of an unused arg to EmitTargetAddressForOp. PiperOrigin-RevId: 171320201 --- .../compiler/xla/service/cpu/ir_emitter.cc | 242 ++++++------------ .../compiler/xla/service/cpu/ir_emitter.h | 9 +- 2 files changed, 86 insertions(+), 165 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 4375f13a0e..e4fb7c0496 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -291,8 +291,7 @@ Status IrEmitter::HandleConstant(HloInstruction* constant, Status IrEmitter::HandleCopy(HloInstruction* copy) { if (ShapeUtil::IsTuple(copy->shape())) { // kCopy shallow copies a tuple so just memcpy the top-level buffer. - TF_ASSIGN_OR_RETURN(llvm::Value * copy_value, EmitTargetAddressForOp(copy)); - emitted_value_[copy] = copy_value; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(copy)); return EmitMemcpy(*(copy->operand(0)), *copy); } else { // Use the elemental emitter for non-tuple shapes. @@ -395,9 +394,7 @@ Status IrEmitter::HandleSelect(HloInstruction* select, HloInstruction* pred, TF_RET_CHECK(pred->shape().element_type() == PRED); if (ShapeUtil::IsTuple(select->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(select)); - emitted_value_[select] = output_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(select)); llvm_ir::EmitTupleSelect(GetIrArrayForOp(select), GetIrArrayForOp(pred), GetEmittedValueFor(on_true), GetEmittedValueFor(on_false), &ir_builder_); @@ -414,8 +411,8 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { // The infeed operation produces data (dequeued from the infeed queue) at this // address, which has been provided by buffer assignment. - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(infeed)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(infeed)); + llvm_ir::IrArray infeed_array = GetIrArrayForOp(infeed); if (ShapeUtil::IsTuple(shape)) { TF_RET_CHECK(!ShapeUtil::IsNestedTuple(shape)); @@ -433,9 +430,9 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { ShapeUtil::GetTupleElementShape(shape, i); // Only the outer tuple buffer's target address is obtained from - // EmitTargetAddressForOp to handle the case when Infeed is the - // root instruction. Target addresses for internal elements can - // be obtained from EmitTempBufferPointer. + // GetEmittedValueFor, to handle the case when Infeed is the root + // instruction. Target addresses for internal elements can be obtained + // from EmitTempBufferPointer. llvm::Value* tuple_element_address = EmitTempBufferPointer(buffer, tuple_element_shape); @@ -445,15 +442,12 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { tuple_element_addresses.push_back(tuple_element_address); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, shape), - tuple_element_addresses, &ir_builder_); + llvm_ir::EmitTuple(infeed_array, tuple_element_addresses, &ir_builder_); } else { - TF_RETURN_IF_ERROR( - EmitXfeedTransfer(XfeedKind::kInfeed, shape, target_address)); + TF_RETURN_IF_ERROR(EmitXfeedTransfer(XfeedKind::kInfeed, shape, + GetEmittedValueFor(infeed))); } - emitted_value_[infeed] = target_address; - return Status::OK(); } @@ -567,15 +561,12 @@ Status IrEmitter::HandleSort(HloInstruction* sort, HloInstruction* operand) { Status IrEmitter::HandleTuple( HloInstruction* tuple, tensorflow::gtl::ArraySlice operands) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(tuple)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(tuple)); std::vector base_ptrs; for (auto operand : operands) { base_ptrs.push_back(GetEmittedValueFor(operand)); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, tuple->shape()), - base_ptrs, &ir_builder_); - emitted_value_[tuple] = target_address; + llvm_ir::EmitTuple(GetIrArrayForOp(tuple), base_ptrs, &ir_builder_); return Status::OK(); } @@ -892,11 +883,8 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, llvm_ir::IrArray lhs_array(GetIrArrayForOp(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); - Shape target_shape = dot->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dot)); - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*dot, &target_array); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dot)); + llvm_ir::IrArray target_array = GetIrArrayForOp(dot); VLOG(2) << "HandleDot: "; VLOG(2) << " lhs operand: " @@ -907,13 +895,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, << llvm_ir::DumpToString(*target_array.GetBasePointer()); // Dot operation is complicated so we delegate to a helper class. - TF_RETURN_IF_ERROR(DotOpEmitter::EmitDotOperation( + return DotOpEmitter::EmitDotOperation( *dot, /*transpose_lhs=*/false, /*transpose_rhs=*/false, target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, - hlo_module_config_)); - - emitted_value_[dot] = target_address; - return Status::OK(); + hlo_module_config_); } Status IrEmitter::HandleConvolution(HloInstruction* convolution, @@ -941,8 +926,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, bool one_dim_convolution = lhs_shape.dimensions_size() == 3; llvm::Value* lhs_address = GetEmittedValueFor(lhs); llvm::Value* rhs_address = GetEmittedValueFor(rhs); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(convolution)); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(convolution)); const ConvolutionDimensionNumbers& dnums = convolution->convolution_dimension_numbers(); @@ -1024,35 +1008,33 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, conv_func->setDoesNotThrow(); conv_func->setOnlyAccessesArgMemory(); ir_builder_.CreateCall( - conv_func, - { - GetExecutableRunOptionsArgument(), - ir_builder_.CreateBitCast(target_address, float_ptr_type), - ir_builder_.CreateBitCast(lhs_address, float_ptr_type), - ir_builder_.CreateBitCast(rhs_address, float_ptr_type), - ir_builder_.getInt64(input_batch), - ir_builder_.getInt64(input_rows), - ir_builder_.getInt64(input_cols), - ir_builder_.getInt64(input_channels), - ir_builder_.getInt64(kernel_rows), - ir_builder_.getInt64(kernel_cols), - ir_builder_.getInt64(kernel_channels), - ir_builder_.getInt64(kernel_filters), - ir_builder_.getInt64(output_rows), - ir_builder_.getInt64(output_cols), - ir_builder_.getInt64(row_stride), - ir_builder_.getInt64(col_stride), - ir_builder_.getInt64(padding_top), - ir_builder_.getInt64(padding_bottom), - ir_builder_.getInt64(padding_left), - ir_builder_.getInt64(padding_right), - ir_builder_.getInt64(lhs_row_dilation), - ir_builder_.getInt64(lhs_col_dilation), - ir_builder_.getInt64(rhs_row_dilation), - ir_builder_.getInt64(rhs_col_dilation), - }); - target_address->setName(AsStringRef(IrName(convolution))); - emitted_value_[convolution] = target_address; + conv_func, { + GetExecutableRunOptionsArgument(), + ir_builder_.CreateBitCast( + GetEmittedValueFor(convolution), float_ptr_type), + ir_builder_.CreateBitCast(lhs_address, float_ptr_type), + ir_builder_.CreateBitCast(rhs_address, float_ptr_type), + ir_builder_.getInt64(input_batch), + ir_builder_.getInt64(input_rows), + ir_builder_.getInt64(input_cols), + ir_builder_.getInt64(input_channels), + ir_builder_.getInt64(kernel_rows), + ir_builder_.getInt64(kernel_cols), + ir_builder_.getInt64(kernel_channels), + ir_builder_.getInt64(kernel_filters), + ir_builder_.getInt64(output_rows), + ir_builder_.getInt64(output_cols), + ir_builder_.getInt64(row_stride), + ir_builder_.getInt64(col_stride), + ir_builder_.getInt64(padding_top), + ir_builder_.getInt64(padding_bottom), + ir_builder_.getInt64(padding_left), + ir_builder_.getInt64(padding_right), + ir_builder_.getInt64(lhs_row_dilation), + ir_builder_.getInt64(lhs_col_dilation), + ir_builder_.getInt64(rhs_row_dilation), + ir_builder_.getInt64(rhs_col_dilation), + }); return Status::OK(); } @@ -1367,9 +1349,7 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { mean_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "mean_var"))); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(batch_norm_training)); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(batch_norm_training)); TF_ASSIGN_OR_RETURN( const BufferAllocation::Slice slice, assignment_.GetUniqueSlice(batch_norm_training, /*index=*/{0})); @@ -1425,11 +1405,8 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { target_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "normalize"))); - llvm_ir::EmitTuple( - llvm_ir::IrArray(target_address, batch_norm_training->shape()), - {normalized, mean, var}, &ir_builder_); - emitted_value_[batch_norm_training] = target_address; - + llvm_ir::EmitTuple(GetIrArrayForOp(batch_norm_training), + {normalized, mean, var}, &ir_builder_); return Status::OK(); } @@ -1789,6 +1766,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( } CHECK(!ShapeUtil::IsTuple(reduce->shape())); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(reduce)); // We know we're not reducing over the most minor dimension, which means we // can lower the reduction loop as: @@ -1851,10 +1829,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - llvm_ir::IrArray target_array(target_address, reduce->shape()); - AddAliasingInformationToIrArray(*reduce, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1886,10 +1861,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - llvm_ir::IrArray target_array(target_address, reduce->shape()); - AddAliasingInformationToIrArray(*reduce, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1900,10 +1872,6 @@ StatusOr IrEmitter::EmitVectorizedReduce( ir_builder_.SetInsertPoint(outermost_loop_exit_block); } - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(reduce)); - - emitted_value_[reduce] = target_address; return true; } @@ -2003,9 +1971,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { return DefaultAction(slice); } - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(slice)); - emitted_value_[slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(slice)); if (ShapeUtil::HasZeroElements(slice->shape())) { return Status::OK(); @@ -2077,8 +2043,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { outer_dims.push_back(memcpy_dim); } - llvm_ir::IrArray target_array(target_address, slice->shape()); - AddAliasingInformationToIrArray(*slice, &target_array); + llvm_ir::IrArray target_array = GetIrArrayForOp(slice); const int64 num_outer_loops = outer_dims.size(); llvm_ir::ForLoopNest loops(IrName(slice), &ir_builder_); @@ -2131,10 +2096,7 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice, HloInstruction* operand, HloInstruction* /*start_indices*/) { if (ShapeUtil::IsScalar(dynamic_slice->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dynamic_slice)); - target_address->setName(AsStringRef(IrName(dynamic_slice))); - emitted_value_[dynamic_slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_slice)); return EmitMemcpy(*operand, *dynamic_slice); } return DefaultAction(dynamic_slice); @@ -2190,10 +2152,7 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, HloInstruction* update, HloInstruction* start_indices) { if (ShapeUtil::IsScalar(dynamic_update_slice->shape())) { - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(dynamic_update_slice)); - target_address->setName(AsStringRef(IrName(dynamic_update_slice))); - emitted_value_[dynamic_update_slice] = target_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); return EmitMemcpy(*update, *dynamic_update_slice); } else if (CanUpdateDynamicSliceInPlace(assignment_, dynamic_update_slice)) { VLOG(2) << "Emitting HandleDynamicUpdateSlice in-place."; @@ -2247,9 +2206,7 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, llvm_ir::LoopEmitter(loop_body_emitter, update->shape(), &ir_builder_) .EmitLoop(IrName(dynamic_update_slice, "in_place"))); - TF_ASSIGN_OR_RETURN(llvm::Value * dynamic_update_slice_address, - EmitTargetAddressForOp(dynamic_update_slice)); - emitted_value_[dynamic_update_slice] = dynamic_update_slice_address; + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dynamic_update_slice)); return Status::OK(); } return DefaultAction(dynamic_update_slice); @@ -2348,11 +2305,8 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); Shape target_shape = fusion->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(fusion)); - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*fusion, &target_array); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); + llvm_ir::IrArray target_array = GetIrArrayForOp(fusion); VLOG(2) << "HandleFusion kTransposeDot: "; VLOG(2) << " lhs operand: " << llvm_ir::DumpToString(*lhs_array.GetBasePointer()); @@ -2366,8 +2320,6 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { *dot, dot->operand(0)->IsRank2Transpose(), dot->operand(1)->IsRank2Transpose(), target_array, lhs_array, rhs_array, GetExecutableRunOptionsArgument(), &ir_builder_, hlo_module_config_)); - - emitted_value_[fusion] = target_address; return Status::OK(); } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kLoop) { std::vector parameter_arrays; @@ -2393,14 +2345,9 @@ Status IrEmitter::HandleCall(HloInstruction* call) { parameter_addresses.push_back(GetEmittedValueFor(operand)); } - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(call)); - output_address->setName(AsStringRef(IrName(call))); - + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(call)); EmitArrayFunctionCallInto(call_ir_function, parameter_addresses, - output_address, computation->name()); - - emitted_value_[call] = output_address; + emitted_value_[call], computation->name()); return Status::OK(); } @@ -2429,17 +2376,13 @@ Status IrEmitter::HandleCustomCall( /*Params=*/{i8_ptr_type, operands_alloca->getType()}, /*isVarArg=*/false))); - TF_ASSIGN_OR_RETURN(llvm::Value * output_address, - EmitTargetAddressForOp(custom_call)); - output_address->setName(AsStringRef(IrName(custom_call))); - - auto* output_address_arg = - ir_builder_.CreatePointerCast(output_address, i8_ptr_type); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(custom_call)); + auto* output_address_arg = ir_builder_.CreatePointerCast( + GetEmittedValueFor(custom_call), i8_ptr_type); ir_builder_.CreateCall(custom_call_ir_function, {output_address_arg, operands_alloca}); - emitted_value_[custom_call] = output_address; return Status::OK(); } @@ -2583,10 +2526,8 @@ StatusOr IrEmitter::EmitFastConcatenate( llvm::Type* i8_ptr_type = ir_builder_.getInt8PtrTy(); llvm::Type* i8_type = ir_builder_.getInt8Ty(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(concatenate)); - - llvm_ir::IrArray target_array(target_address, output_shape); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(concatenate)); + llvm_ir::IrArray target_array = GetIrArrayForOp(concatenate); llvm_ir::ForLoopNest loops(IrName(concatenate), &ir_builder_); llvm_ir::IrArray::Index outer_dims_index = @@ -2603,8 +2544,6 @@ StatusOr IrEmitter::EmitFastConcatenate( unsigned primitive_type_size = ShapeUtil::ByteSizeOfPrimitiveType(primitive_type); - AddAliasingInformationToIrArray(*concatenate, &target_array); - // Contiguous subregions from each operand to the concatenate contribute to a // contiguous subregion in the target buffer starting at target_region_begin. llvm::Value* target_region_begin = ir_builder_.CreateBitCast( @@ -2647,8 +2586,6 @@ StatusOr IrEmitter::EmitFastConcatenate( SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &ir_builder_); } - emitted_value_[concatenate] = target_address; - return true; } @@ -2842,15 +2779,6 @@ Status IrEmitter::Preprocess(HloInstruction* hlo) { } Status IrEmitter::Postprocess(HloInstruction* hlo) { - // Set the name of the emitted llvm::Value to IrName(hlo). Outfeed and send - // the only ops that don't emit a value. - if (hlo->opcode() != HloOpcode::kOutfeed && - hlo->opcode() != HloOpcode::kSend) { - auto it = emitted_value_.find(hlo); - CHECK(it != emitted_value_.end()); - it->second->setName(AsStringRef(IrName(hlo))); - } - if (auto* prof_counter = GetProfileCounterFor(hlo)) { profiling_state_.RecordCycleDelta(&ir_builder_, hlo, prof_counter); } @@ -3027,10 +2955,10 @@ llvm::Value* IrEmitter::EmitArrayFunctionCall( return return_value_buffer; } -StatusOr IrEmitter::EmitTargetAddressForOp( - const HloInstruction* op, const ShapeIndex& shape_index) { - const Shape& target_shape = ShapeUtil::GetSubshape(op->shape(), shape_index); - if (op == op->parent()->root_instruction() && shape_index.empty()) { +Status IrEmitter::EmitTargetAddressForOp(const HloInstruction* op) { + llvm::Value* addr; + const Shape& target_shape = op->shape(); + if (op == op->parent()->root_instruction()) { // For the root node, we write directly to the output buffer of the // function. llvm::Argument* retval = GetResultArgument(); @@ -3040,15 +2968,18 @@ StatusOr IrEmitter::EmitTargetAddressForOp( attr_builder.addDereferenceableAttr(ByteSizeOf(target_shape)); retval->addAttrs(attr_builder); } - return ir_builder_.CreateBitCast(retval, + addr = ir_builder_.CreateBitCast(retval, IrShapeType(target_shape)->getPointerTo()); - } - - // For other nodes, we need the temporary buffer allocated for this node to - // write the result into. - TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, - assignment_.GetUniqueTopLevelSlice(op)); - return EmitTempBufferPointer(slice, target_shape); + } else { + // For other nodes, we need the temporary buffer allocated for this node to + // write the result into. + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice, + assignment_.GetUniqueTopLevelSlice(op)); + addr = EmitTempBufferPointer(slice, target_shape); + } + addr->setName(AsStringRef(IrName(op))); + emitted_value_[op] = addr; + return Status::OK(); } Status IrEmitter::EmitTargetElementLoop( @@ -3062,12 +2993,9 @@ Status IrEmitter::EmitTargetElementLoop( const llvm_ir::ElementGenerator& element_generator) { VLOG(2) << "EmitTargetElementLoop: " << target_op->ToString(); - // target_address will hold the address of the target buffer we will write the - // result of the computation into. const Shape& target_shape = target_op->shape(); - TF_ASSIGN_OR_RETURN(llvm::Value * target_address, - EmitTargetAddressForOp(target_op)); - VLOG(2) << " target address: " << llvm_ir::DumpToString(*target_address); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(target_op)); + llvm_ir::IrArray target_array = GetIrArrayForOp(target_op); if (target_op->IsMultiOutputFusion()) { // For multiple outputs fusion, we need to emit each operand and the root. @@ -3090,13 +3018,9 @@ Status IrEmitter::EmitTargetElementLoop( for (int64 i = 0; i < output_arrays.size(); ++i) { tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer()); } - llvm_ir::EmitTuple(llvm_ir::IrArray(target_address, target_shape), - tuple_operand_ptrs, &ir_builder_); + llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &ir_builder_); } else { - llvm_ir::IrArray target_array(target_address, target_shape); - AddAliasingInformationToIrArray(*target_op, &target_array); - if (ShouldEmitParallelLoopFor(*target_op)) { TF_RETURN_IF_ERROR(EmitParallelTargetElementLoop( target_shape, element_generator, IrName(target_op), &target_array)); @@ -3106,8 +3030,6 @@ Status IrEmitter::EmitTargetElementLoop( .EmitLoop(IrName(target_op))); } } - - emitted_value_[target_op] = target_address; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 05663b6038..fd9ee71799 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -353,11 +353,10 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status EmitMemcpy(const HloInstruction& source, const HloInstruction& destination); - // Emit IR to compute the target address of the buffer for the given op. - // The returned Value is a pointer to a IR type that represents the op's - // element type. - StatusOr EmitTargetAddressForOp( - const HloInstruction* op, const ShapeIndex& shape_index = {}); + // Emits IR to compute the target address of the buffer for the given op. + // After calling this function, you can get a pointer to this buffer by + // calling GetIrArrayForOp or GetEmittedValueFor. + Status EmitTargetAddressForOp(const HloInstruction* op); // Structurizes "array_elements" into an MD array that represents "shape". // This is a recursive function, and "dimension_index" indicates the index of -- GitLab From 3110185270e93e0b6a3e82be9199febed1239602 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 11:37:42 -0700 Subject: [PATCH 126/909] Use the new Estimator.get_variable_value() method to get the kmeans cluster centers. PiperOrigin-RevId: 171320755 --- .../contrib/factorization/examples/mnist.py | 2 +- .../python/ops/clustering_ops.py | 8 ++++-- .../factorization/python/ops/kmeans.py | 28 +++---------------- .../learn/python/learn/estimators/kmeans.py | 2 +- 4 files changed, 11 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/factorization/examples/mnist.py b/tensorflow/contrib/factorization/examples/mnist.py index 9eefbccd4d..06a62db004 100644 --- a/tensorflow/contrib/factorization/examples/mnist.py +++ b/tensorflow/contrib/factorization/examples/mnist.py @@ -142,7 +142,7 @@ def inference(inp, num_clusters, hidden1_units, hidden2_units): # initial_clusters=tf.contrib.factorization.KMEANS_PLUS_PLUS_INIT, use_mini_batch=True) - (all_scores, _, clustering_scores, _, _, kmeans_init, + (all_scores, _, clustering_scores, _, kmeans_init, kmeans_training_op) = kmeans.training_graph() # Some heuristics to approximately whiten this output. all_scores = (all_scores[0] - 0.5) * 5 diff --git a/tensorflow/contrib/factorization/python/ops/clustering_ops.py b/tensorflow/contrib/factorization/python/ops/clustering_ops.py index e5c9180662..d7320aeb3d 100644 --- a/tensorflow/contrib/factorization/python/ops/clustering_ops.py +++ b/tensorflow/contrib/factorization/python/ops/clustering_ops.py @@ -51,6 +51,9 @@ COSINE_DISTANCE = 'cosine' RANDOM_INIT = 'random' KMEANS_PLUS_PLUS_INIT = 'kmeans_plus_plus' +# The name of the variable holding the cluster centers. Used by the Estimator. +CLUSTERS_VAR_NAME = 'clusters' + class KMeans(object): """Creates the graph for k-means clustering.""" @@ -279,7 +282,7 @@ class KMeans(object): """ init_value = array_ops.constant([], dtype=dtypes.float32) cluster_centers = variable_scope.variable( - init_value, name='clusters', validate_shape=False) + init_value, name=CLUSTERS_VAR_NAME, validate_shape=False) cluster_centers_initialized = variable_scope.variable( False, dtype=dtypes.bool, name='initialized') @@ -337,7 +340,6 @@ class KMeans(object): assigned cluster instead. cluster_centers_initialized: scalar indicating whether clusters have been initialized. - cluster_centers_var: a Variable holding the cluster centers. init_op: an op to initialize the clusters. training_op: an op that runs an iteration of training. """ @@ -381,7 +383,7 @@ class KMeans(object): inputs, num_clusters, cluster_idx, cluster_centers_var) return (all_scores, cluster_idx, scores, cluster_centers_initialized, - cluster_centers_var, init_op, training_op) + init_op, training_op) def _mini_batch_sync_updates_op(self, update_in_steps, cluster_centers_var, cluster_centers_updated, total_counts): diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index 6284768bdd..9a5413fc3f 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -21,12 +21,10 @@ from __future__ import division from __future__ import print_function import time -import numpy as np from tensorflow.contrib.factorization.python.ops import clustering_ops from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn as model_fn_lib -from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -161,8 +159,7 @@ class _ModelFn(object): * `eval_metric_ops`: Maps `SCORE` to `loss`. * `predictions`: Maps `ALL_DISTANCES` to the distance from each input point to each cluster center; maps `CLUSTER_INDEX` to the index of - the closest cluster center for each input point; maps `CLUSTERS` to - the cluster centers (which ignores the input points). + the closest cluster center for each input point. """ # input_points is a single Tensor. Therefore, the sharding functionality # in clustering_ops is unused, and some of the values below are lists of a @@ -184,8 +181,8 @@ class _ModelFn(object): # training_op: an op that runs an iteration of training, either an entire # Lloyd iteration or a mini-batch of a Lloyd iteration. Multiple workers # may execute this op, but only after is_initialized becomes True. - (all_distances, model_predictions, losses, is_initialized, - cluster_centers_var, init_op, training_op) = clustering_ops.KMeans( + (all_distances, model_predictions, losses, is_initialized, init_op, + training_op) = clustering_ops.KMeans( inputs=input_points, num_clusters=self._num_clusters, initial_clusters=self._initial_clusters, @@ -215,7 +212,6 @@ class _ModelFn(object): predictions={ KMeansClustering.ALL_DISTANCES: all_distances[0], KMeansClustering.CLUSTER_INDEX: model_predictions[0], - KMeansClustering.CLUSTERS: cluster_centers_var.value(), }, loss=loss, train_op=training_op, @@ -242,9 +238,7 @@ class KMeansClustering(estimator.Estimator): # Keys returned by predict(). # ALL_DISTANCES: The distance from each input point to each cluster center. # CLUSTER_INDEX: The index of the closest cluster center for each input point. - # CLUSTERS: The cluster centers (which ignores the input points). CLUSTER_INDEX = 'cluster_index' - CLUSTERS = 'clusters' ALL_DISTANCES = 'all_distances' def __init__(self, @@ -400,18 +394,4 @@ class KMeansClustering(estimator.Estimator): def cluster_centers(self): """Returns the cluster centers.""" - - # TODO(ccolby): Fix this clunky code once cl/168262087 is submitted. - # Discussion: go/estimator-get-variable-value - class RunOnceHook(session_run_hook.SessionRunHook): - """Stops after a single run.""" - - def after_run(self, run_context, run_values): - del run_values # unused - run_context.request_stop() - - result = self.predict( - input_fn=lambda: (constant_op.constant([], shape=[0, 1]), None), - predict_keys=[KMeansClustering.CLUSTERS], - hooks=[RunOnceHook()]) - return np.array([r[KMeansClustering.CLUSTERS] for r in result]) + return self.get_variable_value(clustering_ops.CLUSTERS_VAR_NAME) diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py index b4d9c3fc6f..a92302420f 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py @@ -106,7 +106,7 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config): """Model function for KMeansClustering estimator.""" assert labels is None, labels (all_scores, model_predictions, losses, - is_initialized, _, init_op, training_op) = clustering_ops.KMeans( + is_initialized, init_op, training_op) = clustering_ops.KMeans( _parse_tensor_or_dict(features), params.get('num_clusters'), initial_clusters=params.get('training_initial_clusters'), -- GitLab From 5eaefbabce16bffeeb4b19cee9890b1aeccabb09 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 6 Oct 2017 11:44:25 -0700 Subject: [PATCH 127/909] Merge changes from github. END_PUBLIC --- Commit ee0fdc296 authored by Gunhan Gulsoy Committed by TensorFlower Gardener: Add noasan tag to estimator_test PiperOrigin-RevId: 171075499 --- Commit a02116882 authored by Justin Lebar Committed by TensorFlower Gardener: [XLA:CPU] Put the HLO name in IR values that hold the HLO's value. PiperOrigin-RevId: 171075449 --- Commit 89aaac4bc authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Allow Layer.add_update() in Eager mode. PiperOrigin-RevId: 171070861 --- Commit 840dcae57 authored by Amit Patankar Committed by gunan: Updating the install sources file with a supported configs table (#13450) * Updating the install sources file with a supported configs page. * Implementing Gunan's suggestions. * Adding GCC string to Linux compiler. * Updating the bazel/cmake column. --- Commit 89df2e336 authored by Igor Saprykin Committed by TensorFlower Gardener: Add the 'is_the_final_export' signal to Exporters. Use them in training. When the training ends, the final export is performed via `Exporter.export()` call. That final export is going to have is_the_final_export parameter being set to true. If `TrainSpec.max_steps` is `None`, then "when training ends" is undefined. We are going to train forever. In that case, `is_the_final_export` is going to be always False. I added a note about it. PiperOrigin-RevId: 171070760 --- Commit 4486b4f69 authored by Akshay Agrawal Committed by TensorFlower Gardener: Make graph_callable compatible with functions that do not return anything PiperOrigin-RevId: 171067061 --- Commit 39565c0cb authored by Martin Wicke Committed by TensorFlower Gardener: Publish train_and_evaluate and associated classes. PiperOrigin-RevId: 171066379 --- Commit 3b4477000 authored by Saurabh Saxena Committed by TensorFlower Gardener: Make VariantTensorData::tensors_size() const. PiperOrigin-RevId: 171063397 --- Commit 53cc63a2d authored by Dhananjay Nakrani Committed by TensorFlower Gardener: [part 1] Add support for int32 & int64 in RandomPoissonOp. This computes int32/int64-precision poisson samples with double precision intermediate calculations (same as it's done for `half`) respectively. part 2 will switch over python calls to new op once forward compatibility period has passed. PiperOrigin-RevId: 171058336 --- Commit 70fc9bf9b authored by Asim Shankar Committed by TensorFlower Gardener: Java: Add support for loading op libraries dynamically. This change adds the equivalent of tf.load_op_library in Python to Java. (https://github.com/tensorflow/tensorflow/commit/5c7f9e316d8c7735308a217310350d416d7498cc was required to make this possible) Though, TensorFlow.loadLibrary() is likely to fail on Windows as symbols required by custom op libraries (those exported by the tensorflow_framework library) are not exported by the monolithic JNI library yet. This should help with #10454 and #13476 PiperOrigin-RevId: 171054707 --- Commit e7c53698e authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Internal cleanup PiperOrigin-RevId: 171053770 --- Commit cc8ee6c0f authored by Alexandre Passos Committed by TensorFlower Gardener: Fast path for tf.conj when it should be pass-through. PiperOrigin-RevId: 171053662 --- Commit c41dbc3c1 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Adding TF Boosted trees regression example on boston dataset, minor fix for mnist example. PiperOrigin-RevId: 171052367 --- Commit d66e77f7c authored by Mustafa Ispir Committed by TensorFlower Gardener: Added get variable utils to tf.estimator.Estimator. PiperOrigin-RevId: 171052121 --- Commit 083bd5dde authored by Asim Shankar Committed by TensorFlower Gardener: Java: Add support for loading op libraries dynamically. This change adds the equivalent of tf.load_op_library in Python to Java. (https://github.com/tensorflow/tensorflow/commit/5c7f9e316d8c7735308a217310350d416d7498cc was required to make this possible) Though, TensorFlow.loadLibrary() is likely to fail on Windows as symbols required by custom op libraries (those exported by the tensorflow_framework library) are not exported by the monolithic JNI library yet. This should help with #10454 and #13476 PiperOrigin-RevId: 171054707 --- Commit 2fe6cf285 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Internal cleanup PiperOrigin-RevId: 171053770 --- Commit 15155493b authored by Alexandre Passos Committed by TensorFlower Gardener: Fast path for tf.conj when it should be pass-through. PiperOrigin-RevId: 171053662 --- Commit 6c954d0b3 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Adding TF Boosted trees regression example on boston dataset, minor fix for mnist example. PiperOrigin-RevId: 171052367 --- Commit ad69076eb authored by Mustafa Ispir Committed by TensorFlower Gardener: Added get variable utils to tf.estimator.Estimator. PiperOrigin-RevId: 171052121 --- Commit 3cf41b2ed authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Test save/restore variable from graph_callable. PiperOrigin-RevId: 171051237 --- Commit cf17ec96e authored by Yangzihao Wang Committed by TensorFlower Gardener: Add V2 versions of output window size computation functions for convolution. These V2 versions take arbitrary dilation rates. In preparation for the support of native cudnn dilated convolution. PiperOrigin-RevId: 171048878 --- Commit 491584ff4 authored by Asim Shankar Committed by TensorFlower Gardener: eager: Always run dataset iterator operations on CPU. It has no kernels for other devices. With an explicit "tf.device()" before invoking the kernel we ensure that Iterator.next() functions even when placed inside a: with tf.device("/device:GPU:0") PiperOrigin-RevId: 171048558 --- Commit 3b354016e authored by Igor Saprykin Committed by TensorFlower Gardener: Rename SavedModelExporter to LatestExporter. PiperOrigin-RevId: 171048345 --- Commit 943c6d7af authored by Jianwei Xie Committed by TensorFlower Gardener: errors out if the evaluator has task id > 0. PiperOrigin-RevId: 171047652 --- Commit 8c9ef4466 authored by Mark Heffernan Committed by TensorFlower Gardener: Expand set of 64-bit type tests in LocalClientExecuteTest.ShapeBufferToLiteralConversion64bit and factor out into their own test. PiperOrigin-RevId: 171043047 --- Commit cc521eb06 authored by Benoit Steiner Committed by TensorFlower Gardener: Place all the nodes created by the trivial_test_graph_input_yielder PiperOrigin-RevId: 171045878 --- Commit 9b9301240 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: [XLA:CPU] Factor out parallel task assignment from cpu parallelization prep (no functional changes). PiperOrigin-RevId: 171045137 --- Commit 558d878d9 authored by Allen Lavoie Committed by TensorFlower Gardener: TFTS: Move normalization to the base class, start using it for state space models Preivously, state space models adjusted their priors based on the data (e.g. setting initial variances to match sample variance) but did not normalize the data itself. When the data has a rather extreme scale, this runs into precision issues. After this CL, state space models will first normalize, then use adjusted statistics on top of that normalization to estimate initial observation/transition noise. Also fixes an issue where start-of-series statistics were incorrect for the first batch (which only shows up with large input scales). PiperOrigin-RevId: 171044863 --- Commit 266f77156 authored by Mark Heffernan Committed by TensorFlower Gardener: Expand set of 64-bit type tests in LocalClientExecuteTest.ShapeBufferToLiteralConversion64bit and factor out into their own test. PiperOrigin-RevId: 171043047 --- Commit c9915d1a2 authored by Shanqing Cai Committed by TensorFlower Gardener: [tf-signal] Fix pip tests by including test_util in signal_py PiperOrigin-RevId: 171042732 --- Commit f8550f4e9 authored by Mark Heffernan Committed by TensorFlower Gardener: Expand set of 64-bit type tests in LocalClientExecuteTest.ShapeBufferToLiteralConversion64bit and factor out into their own test. PiperOrigin-RevId: 171043047 --- Commit 87dc532cd authored by Shanqing Cai Committed by TensorFlower Gardener: [tf-signal] Fix pip tests by including test_util in signal_py PiperOrigin-RevId: 171042732 --- Commit 0578dd65e authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Add more debugging output for XLA send/recv. PiperOrigin-RevId: 171041978 --- Commit 23992bb09 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Several minor documentation fixes. PiperOrigin-RevId: 171038610 --- Commit af14ed3f3 authored by Jianwei Xie Committed by TensorFlower Gardener: Some docstring twists and argument validations. PiperOrigin-RevId: 171037949 --- Commit 6b90a65f6 authored by Mark Heffernan Committed by TensorFlower Gardener: Remove "hybrid" HloModuleConfig option. The option was used to generate executables which only generated the array values of tuple-shaped outputs, not the tuple index tables.. With cl/170133015, ShapedBuffers which hold the computation output now have materialized tuples with these index tables so this option is no longer desired or necessary. No functional change. Just cleanup. PiperOrigin-RevId: 171035738 --- Commit 41a0264ab authored by Mustafa Ispir Committed by TensorFlower Gardener: Added utilities to make global step reading deterministic. Used them in Estimator. Enabled/Fixed some tests. PiperOrigin-RevId: 171035291 --- Commit 9d7843c0a authored by Skye Wanderman-Milne Committed by TensorFlower Gardener: Add optional unused_input_map_keys output param to ImportGraphDef This is a more general feature than that in the Python importer, which raises an exception if the input map contains unused names. PiperOrigin-RevId: 171029316 --- Commit 4f10a6597 authored by Mark Heffernan Committed by TensorFlower Gardener: Add vlogging of HloModule before and after fusion. PiperOrigin-RevId: 171029054 --- Commit 9e658545a authored by Reed Wanderman-Milne Committed by TensorFlower Gardener: Document what dtype tf.image.resize_images returns. For consistency, tf.image.resize_images now will always return a float32 when method != ResizeMethod.NEAREST_NEIGHBOR. Before, it returned the same dtype as its input if it could be determined statically that the height and width would not be changed. PiperOrigin-RevId: 171028825 --- Commit 4d70239f0 authored by Jianwei Xie Committed by TensorFlower Gardener: Replace the contrib FC with core FC in canned Estimator docstring. PiperOrigin-RevId: 171027602 --- Commit 6a1b867ff authored by Jianwei Xie Committed by TensorFlower Gardener: Adds the docstring with details for tf.estimator.train_and_evaluate PiperOrigin-RevId: 171027527 --- Commit 7209c1602 authored by Peter Hawkins Committed by TensorFlower Gardener: [TF:XLA] Mark IdentityN as CompilationOnly(). PiperOrigin-RevId: 171025171 --- Commit 8e22eb874 authored by FAIJUL Committed by Benoit Steiner: Eigen BiasAdd and BiasAddGrad Fix for NCHW Format. (#13158) --- Commit 7db7a890c authored by Jingyue Wu Committed by TensorFlower Gardener: [Grappler] Move InferOutputShapes to GraphProperties. So it can be used by other optimizers. No functional changes. PiperOrigin-RevId: 171010106 --- Commit 2114fd51e authored by Peter Hawkins Committed by TensorFlower Gardener: [TF:XLA] Improve numerical stability of SoftPlus. PiperOrigin-RevId: 171003559 --- Commit 727d6270f authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Fix race condition in TensorForest tree traversal. PiperOrigin-RevId: 170990425 --- Commit d016cb020 authored by Suharsh Sivakumar Committed by TensorFlower Gardener: Fix c++ gradients issue where multiple dependent outputs result in incorrect answer. The issue is that we incorrectly calculate the pending num_expected_backprops for outputs nodes when one output transitively depends on another. this is because we use output nodes as an indicator of when we need to end our traversal. Instead we should only use output nodes that don't transitively get consumed by other output nodes as end indicators for our traversal. This change implements that fix. Fixes #13190 PiperOrigin-RevId: 170971937 --- Commit 5405f3bd7 authored by gunan Committed by Frank Chen: Fix tf-signal tests on pip packages. (#13483) --- Commit f9f037c1c authored by Eugene Brevdo Committed by TensorFlower Gardener: Bugfix to LSTMBlockCell and friends: clipping is off by default. * Rename broken API argu clip_cell boolean to cell_clip value. * Make default no clipping. PiperOrigin-RevId: 170960975 --- Commit bfaaefa9e authored by Frank Chen Committed by TensorFlower Gardener: Update APIs for TPU Cluster Resolver to remove the custom API definition and instead use a standard definition file stored in GCS. PiperOrigin-RevId: 170960877 --- Commit c31c118a3 authored by Ian Langmore Committed by TensorFlower Gardener: Extend tf.contrib.bijector API to handle some non-injective transforms. AbsoluteValue Bijector added to contrib/distributions/bijectors/ TransformedDistribution udpated to handle some non-injective transforms. PiperOrigin-RevId: 170960054 --- Commit 664dd0859 authored by Frank Chen Committed by TensorFlower Gardener: Disable cluster_function_library_runtime_test on Mac OS as it is currently failing with an Unimplemented error PiperOrigin-RevId: 170958505 --- Commit 6af7ab97a authored by Mahmoud Abuzaina Committed by gunan: MKL-DNN open source integration. (#13135) * MKL-DNN conv and build integration * Adding new files that were mistakenly missing from the PR * Minor change in the pip package build file * Added missing #include * Fixed a linking failure when running the bazel test * Fixing BUILD file format * Using -fopenmp for building mkl_dnn only when running on linux * Fixing build rule attribute value * Removing unnecessary deps from mkl test rule * Removed deps on mkl-dnn when not building with --config=mkl --- Commit 93fa1af76 authored by Akshay Agrawal Committed by TensorFlower Gardener: Make graph_callable, defun tf_decorators PiperOrigin-RevId: 170948777 --- Commit b39525785 authored by Mustafa Ispir Committed by TensorFlower Gardener: Added comment re:behavior of listener in case of multiple saver hooks. PiperOrigin-RevId: 170946536 --- Commit de14fcbb6 authored by Igor Saprykin Committed by TensorFlower Gardener: Support evaluation in `_TrainingExecutor.run_master()`. This CL aims to address the following TODO: # TODO(b/66720832): Once listener API is added into Estimator.train, the # eval and export process should be wrapped as a listener and passed to # _start_distributed_training. The expected behavior should be # 1. The export is invoked after each intermediate evaluation. # 2. The evaluation and export should be invoked correctly at the end of # training. This should be fine if the listener works as intended (it will # send the `after_save` signal for the final ckpt saving). 1. is achieved as follows: a. saving_evaluators are added to the CheckpointSaverHook's listeners inside the Estimator. b. MonitoredSession calls after_run() of CheckpointSaverHook, which in turn calls after_save on the listeners. 2. is achieved in a similar way, but when MonitoredSession calls .end() on CheckpointSaverHook. PiperOrigin-RevId: 170945961 --- Commit d4ea993ca authored by Alexandre Passos Committed by TensorFlower Gardener: Removes unnecessary eager-mode call to convert_to_tensor in record_gradient. PiperOrigin-RevId: 170944265 --- Commit add6d2d03 authored by RJ Ryan Committed by TensorFlower Gardener: [tf-signal] Use tf.spectral.dct in mfccs_from_log_mel_spectrograms instead of a private implementation. PiperOrigin-RevId: 170943986 --- Commit b959da92f authored by Jiri Simsa Committed by TensorFlower Gardener: Fixing CPU implementation of parallel_stack for tensors with non-zero rank. PiperOrigin-RevId: 170942814 --- Commit 4cf61262a authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Improve TFGAN documentation. PiperOrigin-RevId: 170940188 --- Commit 0068086b9 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Introduce `tf.data` namespace. PiperOrigin-RevId: 170939033 --- Commit 0c8dbc1fd authored by Alexandre Passos Committed by TensorFlower Gardener: matmul uses shape_tuple internally PiperOrigin-RevId: 170938790 --- Commit ad37fa81f authored by Igor Saprykin Committed by TensorFlower Gardener: Refactor ExportStrategies into Exporters. This design eliminates some indirection. Instead of combining an `export_fn` with `make_export_strategy` call to arrive at an ExportStrategy that is going to call the supplied `export_fn` inside its `export` call with Exporters one just defines the `export` call in an Exporter. PiperOrigin-RevId: 170936640 --- Commit b925f8553 authored by Alexandre Passos Committed by TensorFlower Gardener: Fast-path for EagerTensorBase.dtype PiperOrigin-RevId: 170933005 --- Commit 08e266d9b authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Pass activity_regularizer to __init__ instead of using the (now deprecated) property setter. PiperOrigin-RevId: 170932807 --- Commit b002c8b7d authored by Jingyue Wu Committed by TensorFlower Gardener: [Grappler] Fold chains of reshapes. Reshape(Reshape(input, shape1), shape2) is equivalent to Reshape(input, shape2). PiperOrigin-RevId: 170932278 --- Commit 075d1d13b authored by horance Committed by Frank Chen: remove warning for forward decl (#13459) --- Commit 931609fcf authored by Ryohei Kuroki Committed by Frank Chen: Remove unnecessary specification for default kernel name (#13465) --- Commit 94463f521 authored by Akshay Agrawal Committed by TensorFlower Gardener: Preserve target function signature in custom_gradient decorator PiperOrigin-RevId: 170931715 --- Commit 681056636 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Internal change to simplify prediction ops. - it no longer returns predictions_no_dropout, which is mostly for debugging purpose. - as a consequence, MultipleAdditiveTrees::Predict() doesn't return prediction_no_dropout, and it accept trees_to_include indexes intead of trees_to_drop indexes. PiperOrigin-RevId: 170926422 --- Commit d6e963b82 authored by Asim Shankar Committed by TensorFlower Gardener: SYCL: Fix build breakage introduced in https://github.com/tensorflow/tensorflow/commit/f0e8c545e0196b8b48ce0ad0f116df97d980d1f1 Fixes #13350 PiperOrigin-RevId: 170923862 --- Commit 5123f2971 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Internal cleanup. PiperOrigin-RevId: 170922297 --- Commit d0c76cd18 authored by Igor Saprykin Committed by TensorFlower Gardener: Handle the absence of a fresh eval checkpoint in `run_local`. It is ~unexpected condition for an eval checkpoint to not be available after a train call to the estimator. There is a corner case when it is possible, but that's going to be resolved soon. This case is handled for continuous (distributed) evaluation differently. Instead of erroring out, we skip evaluation runs. That behavior is captured in the `test_skip_evaluation_due_to_ckpt` test. PiperOrigin-RevId: 170919925 --- Commit 435b31b9f authored by Gunhan Gulsoy Committed by TensorFlower Gardener: BEGIN_PUBLIC Automated g4 rollback of changelist 170892257 PiperOrigin-RevId: 171321707 --- README.md | 6 +- .../compiler/jit/kernels/xla_launch_op.cc | 15 + .../xla/service/gpu/convolution_thunk.cc | 51 +- .../xla/service/gpu/convolution_thunk.h | 4 +- .../android/TensorFlowInferenceInterface.java | 23 +- .../quantiles/weighted_quantiles_summary.h | 2 +- .../kernel_tests/batch_dataset_op_test.py | 40 ++ .../contrib/data/python/ops/dataset_ops.py | 2 +- tensorflow/contrib/deprecated/__init__.py | 2 +- .../contrib/ffmpeg/default/ffmpeg_lib.cc | 10 +- .../framework/python/framework/tensor_util.py | 6 +- .../fused_conv2d_bias_activation_op.cc | 57 +-- tensorflow/contrib/memory_stats/__init__.py | 2 + .../memory_stats/kernels/memory_stats_ops.cc | 22 + .../memory_stats/ops/memory_stats_ops.cc | 4 + .../kernel_tests/memory_stats_ops_test.py | 22 +- .../python/ops/memory_stats_ops.py | 5 + .../resampler/kernels/resampler_ops.cc | 2 +- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 10 +- .../contrib/seq2seq/python/ops/helper.py | 2 +- tensorflow/contrib/signal/BUILD | 1 + .../python/slim/data/tfexample_decoder.py | 5 +- .../slim/data/tfexample_decoder_test.py | 45 +- .../timeseries/python/timeseries/BUILD | 48 +- .../timeseries/python/timeseries/ar_model.py | 2 +- .../python/timeseries/estimators.py | 7 +- .../timeseries/python/timeseries/head.py | 375 +++++++++++++++ .../timeseries/python/timeseries/head_test.py | 267 +++++++++++ .../python/timeseries/model_utils.py | 319 ------------- .../python/timeseries/model_utils_test.py | 236 --------- .../python/timeseries/saved_model_utils.py | 3 +- tensorflow/core/BUILD | 22 +- tensorflow/core/graph/mkl_graph_util.h | 128 +++++ tensorflow/core/graph/mkl_layout_pass.cc | 2 +- tensorflow/core/graph/mkl_layout_pass_test.cc | 2 +- .../core/graph/mkl_tfconversion_pass.cc | 2 +- .../core/graph/mkl_tfconversion_pass_test.cc | 2 +- tensorflow/core/kernels/BUILD | 34 +- tensorflow/core/kernels/bias_op.cc | 159 ++++--- .../core/kernels/conv_grad_filter_ops.cc | 55 +-- .../core/kernels/conv_grad_input_ops.cc | 53 +-- tensorflow/core/kernels/conv_grad_ops_3d.cc | 109 ++--- tensorflow/core/kernels/conv_ops.cc | 51 +- tensorflow/core/kernels/conv_ops_3d.cc | 51 +- tensorflow/core/kernels/decode_csv_op.cc | 19 +- .../dense_to_sparse_batch_dataset_op.cc | 45 +- .../core/kernels/mkl_conv_grad_filter_ops.cc | 181 +++++++ .../core/kernels/mkl_conv_grad_input_ops.cc | 190 +++++++- tensorflow/core/kernels/mkl_conv_ops.cc | 213 +++++++++ tensorflow/core/kernels/mkl_conv_ops.h | 308 ++++++++++++ .../core/kernels/mkl_cwise_ops_common.cc | 2 +- tensorflow/core/lib/strings/numbers.cc | 2 +- tensorflow/core/ops/dataset_ops.cc | 3 +- tensorflow/core/ops/nn_ops.cc | 84 ++-- tensorflow/core/ops/nn_ops_test.cc | 49 -- tensorflow/core/ops/parsing_ops.cc | 2 + tensorflow/core/util/mkl_util.h | 401 ++++++++++++---- .../docs_src/install/install_sources.md | 38 ++ .../org/tensorflow/demo/SpeechActivity.java | 8 +- .../tutorials/word2vec/word2vec_basic.py | 2 +- .../go/example_inception_inference_test.go | 2 +- tensorflow/go/tensor.go | 48 +- tensorflow/go/tensor_test.go | 10 + .../java/src/gen/perl/tftypes-runall.pl | 2 +- tensorflow/java/src/gen/perl/tftypes.pl | 102 ++-- .../java/src/gen/resources/Tensors.java.tmpl | 31 ++ tensorflow/java/src/gen/resources/tftypes.csv | 42 +- .../main/java/org/tensorflow/DataType.java | 39 +- .../src/main/java/org/tensorflow/Graph.java | 7 +- .../src/main/java/org/tensorflow/Input.java | 4 +- .../java/org/tensorflow/NativeLibrary.java | 9 +- .../src/main/java/org/tensorflow/Operand.java | 12 +- .../main/java/org/tensorflow/Operation.java | 18 +- .../java/org/tensorflow/OperationBuilder.java | 14 +- .../src/main/java/org/tensorflow/Output.java | 12 +- .../java/org/tensorflow/SavedModelBundle.java | 5 +- .../src/main/java/org/tensorflow/Session.java | 34 +- .../src/main/java/org/tensorflow/Tensor.java | 241 +++++++--- .../src/main/java/org/tensorflow/Tensors.java | 447 ++++++++++++++++++ .../org/tensorflow/examples/LabelImage.java | 79 ++-- .../main/java/org/tensorflow/op/Operands.java | 8 +- .../java/org/tensorflow/op/core/Constant.java | 34 +- .../main/java/org/tensorflow/types/UInt8.java | 21 + .../org/tensorflow/types/package-info.java | 16 +- .../test/java/org/tensorflow/GraphTest.java | 1 - .../org/tensorflow/OperationBuilderTest.java | 25 +- .../java/org/tensorflow/OperationTest.java | 19 +- .../test/java/org/tensorflow/SessionTest.java | 41 +- .../test/java/org/tensorflow/ShapeTest.java | 2 +- .../test/java/org/tensorflow/TensorTest.java | 99 ++-- .../test/java/org/tensorflow/TestUtil.java | 24 +- .../java/org/tensorflow/op/OperandsTest.java | 7 +- .../org/tensorflow/op/PrimitiveOpTest.java | 2 +- .../java/org/tensorflow/op/ScopeTest.java | 128 +++-- .../org/tensorflow/op/core/ConstantTest.java | 22 +- tensorflow/python/debug/lib/debug_graphs.py | 4 +- .../inputs/queues/feeding_functions.py | 2 +- .../keras/_impl/keras/engine/topology_test.py | 2 +- .../kernel_tests/conv2d_transpose_test.py | 14 + .../python/kernel_tests/decode_csv_op_test.py | 11 + .../kernel_tests/summary_tensor_op_test.py | 2 +- tensorflow/python/ops/hidden_ops.txt | 1 + tensorflow/python/ops/parsing_ops.py | 39 ++ tensorflow/stream_executor/cuda/cuda_dnn.cc | 90 ++-- tensorflow/stream_executor/cuda/cuda_dnn.h | 12 +- tensorflow/stream_executor/dnn.cc | 12 +- tensorflow/stream_executor/dnn.h | 12 +- tensorflow/stream_executor/platform.h | 2 +- tensorflow/stream_executor/stream.h | 2 +- .../stream_executor/stream_executor_pimpl.cc | 22 +- .../stream_executor/stream_executor_pimpl.h | 9 +- tensorflow/tensorflow.bzl | 35 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 2 +- .../tools/ci_build/install/install_golang.sh | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 4 +- .../tools/docker/jupyter_notebook_config.py | 1 - tensorflow/tools/docs/parser.py | 4 +- .../gen_proto_text_functions_lib_test.cc | 9 +- tensorflow/workspace.bzl | 17 +- third_party/gpus/cuda_configure.bzl | 2 +- third_party/mkl_dnn/BUILD | 1 + third_party/mkl_dnn/mkldnn.BUILD | 25 + 122 files changed, 4102 insertions(+), 1655 deletions(-) create mode 100644 tensorflow/contrib/timeseries/python/timeseries/head.py create mode 100644 tensorflow/contrib/timeseries/python/timeseries/head_test.py create mode 100644 tensorflow/core/graph/mkl_graph_util.h create mode 100644 tensorflow/core/kernels/mkl_conv_ops.h create mode 100644 tensorflow/java/src/gen/resources/Tensors.java.tmpl create mode 100644 tensorflow/java/src/main/java/org/tensorflow/Tensors.java create mode 100644 tensorflow/java/src/main/java/org/tensorflow/types/UInt8.java create mode 100644 third_party/mkl_dnn/BUILD create mode 100644 third_party/mkl_dnn/mkldnn.BUILD diff --git a/README.md b/README.md index 4cc53096e0..6339c57c95 100644 --- a/README.md +++ b/README.md @@ -48,9 +48,9 @@ GPU packages on all platforms will arrive soon! * Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/)) * Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/)) * Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/)) -* Windows CPU-only: [Python 3.5 64-bit](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/)) -* Windows GPU: Coming soon! -* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](http://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) +* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/)) +* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/)) +* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/)) #### *Try your first TensorFlow program* diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 1b5dd558dd..27c5da08c1 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -52,6 +52,11 @@ class XlaAllocator : public xla::DeviceMemoryAllocator { bool retry_on_failure) override; Status Deallocate(int device_ordinal, gpu::DeviceMemoryBase* mem) override; + // Register an Tensor (input or resource variable) with the allocator. If + // the operation returns an alias to one of its inputs, then the allocator + // needs to be able to handle it. + Status RegisterArgument(const Tensor* t); + // Makes 'tensor' a wrapper around the data buffer at 'ptr'. The buffer is // interpreted as having data type 'dtype' and shape 'shape'. Status MakeTensorFromBuffer(gpu::DeviceMemoryBase buffer, DataType dtype, @@ -103,6 +108,14 @@ xla::StatusOr XlaAllocator::Allocate( return gpu::DeviceMemoryBase(data, size); } +Status XlaAllocator::RegisterArgument(const Tensor* t) { + void* data = + reinterpret_cast(const_cast(t->tensor_data().data())); + TF_RET_CHECK(data != nullptr); + tensors_[data] = *t; + return Status::OK(); +} + Status XlaAllocator::Deallocate(int device_ordinal, gpu::DeviceMemoryBase* mem) { if (mem->opaque() != nullptr) { @@ -284,6 +297,8 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { shape, client->platform(), client->default_device_ordinal(), dmem) .ConsumeValueOrDie(); arg_ptrs[i] = arg_buffers[i].get(); + + OP_REQUIRES_OK(ctx, xla_allocator.RegisterArgument(t)); } // Make the final parameter point at local_runtime_context. diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 89145a9038..7dd242425c 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -256,9 +256,9 @@ tensorflow::Status ConvolutionThunk::Convolve( algorithm_config.algorithm_no_scratch().algo_id()); } -std::vector ConvolutionThunk::GetAlgorithms( +std::vector ConvolutionThunk::GetAlgorithms( se::StreamExecutor* stream_exec) const { - std::vector algorithms; + std::vector algorithms; // TODO(yangzihao): Currently disable the use of winograd nonfused in XLA // by default. Should send in conv parameters and enable it when // ShouldIncludeWinogradNonfusedAlgo() returns true. @@ -297,32 +297,27 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( se::dnn::ProfileResult best_result; se::dnn::ProfileResult best_result_without_scratch; - std::vector algorithms = - GetAlgorithms(stream->parent()); - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - AlgorithmDesc algorithm(algo_index, use_tensor_ops); - ConvolveScratchAllocator scratch_allocator( - buffer_allocations.device_ordinal(), - buffer_allocations.memory_allocator()); - se::dnn::ProfileResult profile_result; - bool launch_ok = - Convolve(input_descriptor, input_data, filter_descriptor, - filter_data, output_descriptor, output_data, - convolution_descriptor, - se::dnn::AlgorithmConfig(algorithm, algorithm), stream, - &scratch_allocator, &profile_result) - .ok(); - if (launch_ok && profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalAllocatedBytes() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_without_scratch.elapsed_time_in_ms()) { - best_result_without_scratch = profile_result; - } + std::vector algorithms = GetAlgorithms(stream->parent()); + for (auto algorithm : algorithms) { + ConvolveScratchAllocator scratch_allocator( + buffer_allocations.device_ordinal(), + buffer_allocations.memory_allocator()); + se::dnn::ProfileResult profile_result; + bool launch_ok = + Convolve(input_descriptor, input_data, filter_descriptor, filter_data, + output_descriptor, output_data, convolution_descriptor, + se::dnn::AlgorithmConfig(algorithm, algorithm), stream, + &scratch_allocator, &profile_result) + .ok(); + if (launch_ok && profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalAllocatedBytes() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_without_scratch.elapsed_time_in_ms()) { + best_result_without_scratch = profile_result; } } } diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index 509719c1fe..13432301b2 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -115,9 +115,7 @@ class ConvolutionThunk : public Thunk { perftools::gputools::dnn::ProfileResult* profile_result); // Returns the convolve algorithms that can be used for this ConvolutionThunk. - // TODO(nluehr) GetAlgorithms should return AlgorithmDesc including both - // tensor-op and non-tensor-op variants. - std::vector GetAlgorithms( + std::vector GetAlgorithms( perftools::gputools::StreamExecutor* stream_exec) const; // Fastest cuDNN convolution algorithm for this thunk learned from diff --git a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java index 395dd6c5d2..80e03f2036 100644 --- a/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java +++ b/tensorflow/contrib/android/java/org/tensorflow/contrib/android/TensorFlowInferenceInterface.java @@ -31,12 +31,13 @@ import java.nio.IntBuffer; import java.nio.LongBuffer; import java.util.ArrayList; import java.util.List; -import org.tensorflow.DataType; import org.tensorflow.Graph; import org.tensorflow.Operation; import org.tensorflow.Session; import org.tensorflow.Tensor; import org.tensorflow.TensorFlow; +import org.tensorflow.Tensors; +import org.tensorflow.types.UInt8; /** * Wrapper over the TensorFlow API ({@link Graph}, {@link Session}) providing a smaller API surface @@ -328,7 +329,7 @@ public class TensorFlowInferenceInterface { * destination has capacity, the copy is truncated. */ public void feed(String inputName, byte[] src, long... dims) { - addFeed(inputName, Tensor.create(DataType.UINT8, dims, ByteBuffer.wrap(src))); + addFeed(inputName, Tensor.create(UInt8.class, dims, ByteBuffer.wrap(src))); } /** @@ -337,7 +338,7 @@ public class TensorFlowInferenceInterface { * a Java {@code String} (which is a sequence of characters). */ public void feedString(String inputName, byte[] src) { - addFeed(inputName, Tensor.create(src)); + addFeed(inputName, Tensors.create(src)); } /** @@ -346,7 +347,7 @@ public class TensorFlowInferenceInterface { * arbitrary sequence of bytes, not a Java {@code String} (which is a sequence of characters). */ public void feedString(String inputName, byte[][] src) { - addFeed(inputName, Tensor.create(src)); + addFeed(inputName, Tensors.create(src)); } // Methods for taking a native Tensor and filling it with src from Java native IO buffers. @@ -403,7 +404,7 @@ public class TensorFlowInferenceInterface { * destination has capacity, the copy is truncated. */ public void feed(String inputName, ByteBuffer src, long... dims) { - addFeed(inputName, Tensor.create(DataType.UINT8, dims, src)); + addFeed(inputName, Tensor.create(UInt8.class, dims, src)); } /** @@ -544,7 +545,7 @@ public class TensorFlowInferenceInterface { "Model load took " + (endMs - startMs) + "ms, TensorFlow version: " + TensorFlow.version()); } - private void addFeed(String inputName, Tensor t) { + private void addFeed(String inputName, Tensor t) { // The string format accepted by TensorFlowInferenceInterface is node_name[:output_index]. TensorId tid = TensorId.parse(inputName); runner.feed(tid.name, tid.outputIndex, t); @@ -578,7 +579,7 @@ public class TensorFlowInferenceInterface { } } - private Tensor getTensor(String outputName) { + private Tensor getTensor(String outputName) { int i = 0; for (String n : fetchNames) { if (n.equals(outputName)) { @@ -591,7 +592,7 @@ public class TensorFlowInferenceInterface { } private void closeFeeds() { - for (Tensor t : feedTensors) { + for (Tensor t : feedTensors) { t.close(); } feedTensors.clear(); @@ -599,7 +600,7 @@ public class TensorFlowInferenceInterface { } private void closeFetches() { - for (Tensor t : fetchTensors) { + for (Tensor t : fetchTensors) { t.close(); } fetchTensors.clear(); @@ -614,9 +615,9 @@ public class TensorFlowInferenceInterface { // State reset on every call to run. private Session.Runner runner; private List feedNames = new ArrayList(); - private List feedTensors = new ArrayList(); + private List> feedTensors = new ArrayList>(); private List fetchNames = new ArrayList(); - private List fetchTensors = new ArrayList(); + private List> fetchTensors = new ArrayList>(); // Mutable state. private RunStats runStats; diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h index dad3b4e10d..c329c6d4f7 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h @@ -36,7 +36,7 @@ class WeightedQuantilesSummary { struct SummaryEntry { SummaryEntry(const ValueType& v, const WeightType& w, const WeightType& min, const WeightType& max) { - // Explicitely initialize all of memory (including padding from memory + // Explicitly initialize all of memory (including padding from memory // alignment) to allow the struct to be msan-resistant "plain old data". // // POD = http://en.cppreference.com/w/cpp/concept/PODType diff --git a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py index 813c64d141..91f100e0f0 100644 --- a/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/batch_dataset_op_test.py @@ -253,6 +253,46 @@ class BatchDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testDenseToSparseBatchDatasetWithUnknownShape(self): + components = np.random.randint(5, size=(40,)).astype(np.int32) + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(lambda x: array_ops.fill([x, x], x)).dense_to_sparse_batch( + 4, [5, -1]).make_initializable_iterator()) + init_op = iterator.initializer + get_next = sparse_tensor.SparseTensor(*iterator.get_next()) + + with self.test_session() as sess: + sess.run(init_op) + + for start in range(0, len(components), 4): + results = sess.run(get_next) + self.assertAllEqual( + [[i, j, z] for i, c in enumerate(components[start:start+4]) + for j in range(c) for z in range(c)], results.indices) + self.assertAllEqual( + [c for c in components[start:start+4] + for _ in range(c) for _ in range(c)], + results.values) + self.assertAllEqual( + [min(4, len(components) - start), + 5, + np.max(components[start:start+4])], + results.dense_shape) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testDenseToSparseBatchDatasetWithInvalidShape(self): + input_tensor = array_ops.constant([[1]]) + iterator = (dataset_ops.Dataset.from_tensors(input_tensor) + .dense_to_sparse_batch(4, [-2]).make_initializable_iterator()) + init_op = iterator.initializer + + with self.test_session() as sess: + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Dimension -2 must be >= -1"): + sess.run(init_op) + def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply( diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index ff89c47a2e..b74dcd3be2 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -653,7 +653,7 @@ class Dataset(dataset_ops.Dataset): ```python # Preprocess 4 files concurrently, and interleave blocks of 16 records from # each file. - filenames = ["/var/data/file1.txt", "/var/data/file2.txt", ..."] + filenames = ["/var/data/file1.txt", "/var/data/file2.txt", ...] dataset = (Dataset.from_tensor_slices(filenames) .interleave(lambda x: TextLineDataset(x).map(parse_fn, num_parallel_calls=1), diff --git a/tensorflow/contrib/deprecated/__init__.py b/tensorflow/contrib/deprecated/__init__.py index bfea8445a7..7aff045de3 100644 --- a/tensorflow/contrib/deprecated/__init__.py +++ b/tensorflow/contrib/deprecated/__init__.py @@ -91,7 +91,7 @@ from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,line-too-long +# pylint: disable=unused-import from tensorflow.python.ops.logging_ops import audio_summary from tensorflow.python.ops.logging_ops import histogram_summary from tensorflow.python.ops.logging_ops import image_summary diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 888f5c38a2..b417a70b6e 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -208,7 +208,15 @@ string GetTempFilename(const string& extension) { } struct stat statbuf; if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) { - return io::JoinPath(dir, StrCat("tmp_file_", getpid(), ".", extension)); + string tmp_filepath = + io::JoinPath(dir, StrCat("tmp_file_XXXXXX", ".", extension)); + int fd = mkstemps(&tmp_filepath[0], extension.length() + 1); + if (fd < 0) { + LOG(FATAL) << "Failed to create temp file."; + } else { + close(fd); + return tmp_filepath; + } } } LOG(FATAL) << "No temp directory found."; diff --git a/tensorflow/contrib/framework/python/framework/tensor_util.py b/tensorflow/contrib/framework/python/framework/tensor_util.py index e595e4d90b..92a2a4ff2d 100644 --- a/tensorflow/contrib/framework/python/framework/tensor_util.py +++ b/tensorflow/contrib/framework/python/framework/tensor_util.py @@ -78,9 +78,9 @@ def reduce_sum_n(tensors, name=None): return math_ops.add_n(tensors, name=name_scope) @deprecated(None, - "Please switch to tf.confusion_matrix.remove_squeezable_dimensions. Note " - "that order of the inputs and ouputs of labels and predictions have also " - "been switched.") + 'Please switch to tf.confusion_matrix.remove_squeezable_dimensions.' + 'Note that order of the inputs and outputs of labels and ' + 'predictions have also been switched.') def remove_squeezable_dimensions(predictions, labels, name=None): """Squeeze last dim if ranks of `predictions` and `labels` differ by 1. diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 9275d5a22b..256f200868 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -493,42 +493,37 @@ void LaunchFusedConv2DBiasActivationOp:: dnn::AlgorithmConfig algorithm_config; if (cudnn_use_autotune && !AutoTuneConvBiasActivation::GetInstance()->Find( fused_conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( fused_conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); dnn::ProfileResult best_result; dnn::ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - dnn::AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); - dnn::ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenFusedConvolveWithAlgorithm( - conv_input_desc, conv_input_ptr, conv_input_scale, - filter_desc, filter_ptr, conv_desc, side_input_ptr, - side_input_scale, bias_desc, bias_ptr, - dnn::ActivationMode::kRelu, output_desc, &output_ptr, - &scratch_allocator, dnn::AlgorithmConfig(profile_algorithm), - &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); + dnn::ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenFusedConvolveWithAlgorithm( + conv_input_desc, conv_input_ptr, conv_input_scale, + filter_desc, filter_ptr, conv_desc, side_input_ptr, + side_input_scale, bias_desc, bias_ptr, + dnn::ActivationMode::kRelu, output_desc, &output_ptr, + &scratch_allocator, dnn::AlgorithmConfig(profile_algorithm), + &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/contrib/memory_stats/__init__.py b/tensorflow/contrib/memory_stats/__init__.py index a2b2b65692..a32302c854 100644 --- a/tensorflow/contrib/memory_stats/__init__.py +++ b/tensorflow/contrib/memory_stats/__init__.py @@ -14,10 +14,12 @@ # ============================================================================== """Ops for memory statistics. +@@BytesInUse @@BytesLimit @@MaxBytesInUse """ +from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesInUse from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesLimit from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import MaxBytesInUse diff --git a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc index 3b88535dce..7e2e96e160 100644 --- a/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc +++ b/tensorflow/contrib/memory_stats/kernels/memory_stats_ops.cc @@ -40,6 +40,28 @@ class MemoryStatsOp : public OpKernel { const AllocatorStats& allocator_stats) const = 0; }; +// Op that measures current memory in bytes. +class BytesInUseOp : public MemoryStatsOp { + public: + explicit BytesInUseOp(OpKernelConstruction* context) + : MemoryStatsOp(context) {} + + private: + int64 ExtractAllocatorStats( + const AllocatorStats& allocator_stats) const override { + return allocator_stats.bytes_in_use; + } +}; + +// Register this op on GPU only, see comment for MaxBytesInUse for reason +REGISTER_KERNEL_BUILDER(Name("BytesInUse").Device(DEVICE_GPU).HostMemory("out"), + BytesInUseOp); + +#ifdef TENSORFLOW_USE_SYCL +REGISTER_KERNEL_BUILDER( + Name("BytesInUse").Device(DEVICE_SYCL).HostMemory("out"), MaxBytesInUseOp); +#endif // TENSORFLOW_USE_SYCL + // Op that measures the total memory (in bytes) of a device. class BytesLimitOp : public MemoryStatsOp { public: diff --git a/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc b/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc index 08859c8613..42020cf7f6 100644 --- a/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc +++ b/tensorflow/contrib/memory_stats/ops/memory_stats_ops.cc @@ -17,6 +17,10 @@ limitations under the License. namespace tensorflow { +REGISTER_OP("BytesInUse") + .Output("out: int64") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape); REGISTER_OP("BytesLimit") .Output("out: int64") .SetIsStateful() diff --git a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py index ec25c032f0..d1b430b803 100644 --- a/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py +++ b/tensorflow/contrib/memory_stats/python/kernel_tests/memory_stats_ops_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.contrib.memory_stats.python.ops import memory_stats_ops from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops @@ -64,10 +65,29 @@ class MemoryStatsOpsTest(test_util.TensorFlowTestCase): d = math_ops.matmul(c, b) sess.run(d) - max_bytes_in_use = sess.run(memory_stats_ops.MaxBytesInUse()) + max_bytes_in_use_op = memory_stats_ops.MaxBytesInUse() + max_bytes_in_use = sess.run(max_bytes_in_use_op) self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3) self.assertLess(max_bytes_in_use, matrix_size_in_bytes * 4) + # run chain with 2 ops, make sure BytesInUse captures intermediate + # memory usage + a = random_ops.random_uniform(matrix_shape, dtype=dtype) + with ops.control_dependencies([a]): + bytes_in_use_op = memory_stats_ops.BytesInUse() + with ops.control_dependencies([bytes_in_use_op]): + b = random_ops.random_uniform(matrix_shape, dtype=dtype) + + _, bytes_in_use, max_bytes_in_use = sess.run([a, bytes_in_use_op, + max_bytes_in_use_op]) + + # intermediate result allocates 1 matrix, max usage is at least 2 + self.assertGreaterEqual(bytes_in_use, matrix_size_in_bytes * 1) + self.assertLess(bytes_in_use, matrix_size_in_bytes * 2) + + # max usage is still 3 because it reflects maxium from previous .run call + self.assertGreaterEqual(max_bytes_in_use, matrix_size_in_bytes * 3) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/memory_stats/python/ops/memory_stats_ops.py b/tensorflow/contrib/memory_stats/python/ops/memory_stats_ops.py index d35c6583ed..c0f7788c1c 100644 --- a/tensorflow/contrib/memory_stats/python/ops/memory_stats_ops.py +++ b/tensorflow/contrib/memory_stats/python/ops/memory_stats_ops.py @@ -26,6 +26,11 @@ _memory_stats_ops_so = loader.load_op_library( resource_loader.get_path_to_datafile("_memory_stats_ops.so")) +def BytesInUse(): + """Generates an op that computes the current memory of a device.""" + return gen_memory_stats_ops.bytes_in_use() + + def BytesLimit(): """Generates an op that measures the total memory (in bytes) of a device.""" return gen_memory_stats_ops.bytes_limit() diff --git a/tensorflow/contrib/resampler/kernels/resampler_ops.cc b/tensorflow/contrib/resampler/kernels/resampler_ops.cc index afc8bcd446..7d9ef14cef 100644 --- a/tensorflow/contrib/resampler/kernels/resampler_ops.cc +++ b/tensorflow/contrib/resampler/kernels/resampler_ops.cc @@ -122,7 +122,7 @@ struct Resampler2DFunctor{ }; // Rough estimate of work for each batch entry. // From third_party/tensorflow/core/util/work_sharder.cc we gather that an - // estimate of the cost of each work unit is needed to correclty shard the + // estimate of the cost of each work unit is needed to correctly shard the // workload. Shard assumes each cost unit is 1ns, minimum cost per shard // being 10us. const int64 cost = static_cast(num_sampling_points) * diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 1b0327d62b..6702a89d22 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -525,7 +525,7 @@ class GridLSTMCell(rnn_cell_impl.RNNCell): self._state_tuple_type = collections.namedtuple( "GridLSTMStateTuple", state_names.strip(",")) self._state_size = self._state_tuple_type( - *([num_units, num_units] * self._total_blocks)) + *([num_units, num_units] * self._total_blocks)) else: self._state_tuple_type = None self._state_size = num_units * self._total_blocks * 2 @@ -2082,9 +2082,11 @@ def _conv(args, shape_length = len(shapes[0]) for shape in shapes: if len(shape) not in [3,4,5]: - raise ValueError("Conv Linear expects 3D, 4D or 5D arguments: %s" % str(shapes)) + raise ValueError("Conv Linear expects 3D, 4D " + "or 5D arguments: %s" % str(shapes)) if len(shape) != len(shapes[0]): - raise ValueError("Conv Linear expects all args to be of same Dimensiton: %s" % str(shapes)) + raise ValueError("Conv Linear expects all args " + "to be of same Dimension: %s" % str(shapes)) else: total_arg_size_depth += shape[-1] dtype = [a.dtype for a in args][0] @@ -2102,7 +2104,7 @@ def _conv(args, # Now the computation. kernel = vs.get_variable( - "kernel", + "kernel", filter_size + [total_arg_size_depth, num_features], dtype=dtype) if len(args) == 1: diff --git a/tensorflow/contrib/seq2seq/python/ops/helper.py b/tensorflow/contrib/seq2seq/python/ops/helper.py index 64e00c21c7..b55d90cbab 100644 --- a/tensorflow/contrib/seq2seq/python/ops/helper.py +++ b/tensorflow/contrib/seq2seq/python/ops/helper.py @@ -309,7 +309,7 @@ class ScheduledEmbeddingTrainingHelper(TrainingHelper): gen_array_ops.fill([self.batch_size], -1)) def next_inputs(self, time, outputs, state, sample_ids, name=None): - with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperSample", + with ops.name_scope(name, "ScheduledEmbeddingTrainingHelperNextInputs", [time, outputs, state, sample_ids]): (finished, base_next_inputs, state) = ( super(ScheduledEmbeddingTrainingHelper, self).next_inputs( diff --git a/tensorflow/contrib/signal/BUILD b/tensorflow/contrib/signal/BUILD index 43f24474ed..2204b684ac 100644 --- a/tensorflow/contrib/signal/BUILD +++ b/tensorflow/contrib/signal/BUILD @@ -5,6 +5,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) load("//tensorflow:tensorflow.bzl", "cuda_py_tests") +load("//tensorflow:tensorflow.bzl", "py_test") # @unused py_library( name = "signal_py", diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index f9449095be..094568389c 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -135,7 +135,10 @@ class BoundingBox(ItemHandler): """ sides = [] for key in self._full_keys: - side = array_ops.expand_dims(keys_to_tensors[key].values, 0) + side = keys_to_tensors[key] + if isinstance(side, sparse_tensor.SparseTensor): + side = side.values + side = array_ops.expand_dims(side, 0) sides.append(side) bounding_box = array_ops.concat(sides, 0) diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py index 96606b9c0e..60d1eba07f 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py @@ -692,7 +692,7 @@ class TFExampleDecoderTest(test.TestCase): else: self.assertAllClose(image, decoded_image, atol=0) - def testDecodeExampleWithBoundingBox(self): + def testDecodeExampleWithBoundingBoxSparse(self): num_bboxes = 10 np_ymin = np.random.rand(num_bboxes, 1) np_xmin = np.random.rand(num_bboxes, 1) @@ -731,6 +731,49 @@ class TFExampleDecoderTest(test.TestCase): self.assertAllClose(np_bboxes, bboxes) + def testDecodeExampleWithBoundingBoxDense(self): + num_bboxes = 10 + np_ymin = np.random.rand(num_bboxes, 1) + np_xmin = np.random.rand(num_bboxes, 1) + np_ymax = np.random.rand(num_bboxes, 1) + np_xmax = np.random.rand(num_bboxes, 1) + np_bboxes = np.hstack([np_ymin, np_xmin, np_ymax, np_xmax]) + + example = example_pb2.Example(features=feature_pb2.Features(feature={ + 'image/object/bbox/ymin': self._EncodedFloatFeature(np_ymin), + 'image/object/bbox/xmin': self._EncodedFloatFeature(np_xmin), + 'image/object/bbox/ymax': self._EncodedFloatFeature(np_ymax), + 'image/object/bbox/xmax': self._EncodedFloatFeature(np_xmax), + })) + serialized_example = example.SerializeToString() + + with self.test_session(): + serialized_example = array_ops.reshape(serialized_example, shape=[]) + + keys_to_features = { + 'image/object/bbox/ymin': parsing_ops.FixedLenSequenceFeature( + [], dtypes.float32, allow_missing=True), + 'image/object/bbox/xmin': parsing_ops.FixedLenSequenceFeature( + [], dtypes.float32, allow_missing=True), + 'image/object/bbox/ymax': parsing_ops.FixedLenSequenceFeature( + [], dtypes.float32, allow_missing=True), + 'image/object/bbox/xmax': parsing_ops.FixedLenSequenceFeature( + [], dtypes.float32, allow_missing=True), + } + + items_to_handlers = { + 'object/bbox': + tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'], + 'image/object/bbox/'), + } + + decoder = tfexample_decoder.TFExampleDecoder(keys_to_features, + items_to_handlers) + [tf_bboxes] = decoder.decode(serialized_example, ['object/bbox']) + bboxes = tf_bboxes.eval() + + self.assertAllClose(np_bboxes, bboxes) + def testDecodeExampleWithRepeatedImages(self): image_shape = (2, 3, 3) image_format = 'png' diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 2c4bed5db1..da583a2ba0 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -42,6 +42,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":feature_keys", + ":head", ":input_pipeline", ":model_utils", "//tensorflow/python:util", @@ -78,8 +79,8 @@ py_library( deps = [ ":ar_model", ":feature_keys", + ":head", ":math_utils", - ":model_utils", ":state_management", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:filtering_postprocessor", "//tensorflow/contrib/timeseries/python/timeseries/state_space_models:state_space_model", @@ -123,9 +124,9 @@ py_test( ) py_library( - name = "model_utils", + name = "head", srcs = [ - "model_utils.py", + "head.py", ], srcs_version = "PY2AND3", deps = [ @@ -149,9 +150,9 @@ py_library( ) py_test( - name = "model_utils_test", + name = "head_test", srcs = [ - "model_utils_test.py", + "head_test.py", ], srcs_version = "PY2AND3", tags = [ @@ -159,8 +160,8 @@ py_test( ], deps = [ ":feature_keys", + ":head", ":model", - ":model_utils", ":state_management", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -174,6 +175,41 @@ py_test( ], ) +py_library( + name = "model_utils", + srcs = [ + "model_utils.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":feature_keys", + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:init_ops", + "//tensorflow/python:nn_ops", + "//tensorflow/python:variable_scope", + "//third_party/py/numpy", + ], +) + +py_test( + name = "model_utils_test", + srcs = [ + "model_utils_test.py", + ], + srcs_version = "PY2AND3", + tags = [ + "no_pip_gpu", # b/63391119 + ], + deps = [ + ":model_utils", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:variables", + ], +) + py_library( name = "state_management", srcs = [ diff --git a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py index 267a5f88da..ff140efd48 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/ar_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/ar_model.py @@ -374,7 +374,7 @@ class ARModel(model.TimeSeriesModel): original_values = values # Extra shape checking for the window size (above that in - # model_utils.make_model_fn). + # `head.create_estimator_spec`). expected_times_shape = [None, self.window_size] if not times.get_shape().is_compatible_with(expected_times_shape): raise ValueError( diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index 4025a8f014..3738dfa154 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -20,8 +20,8 @@ from __future__ import print_function from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import feature_keys +from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib from tensorflow.contrib.timeseries.python.timeseries import math_utils -from tensorflow.contrib.timeseries.python.timeseries import model_utils from tensorflow.contrib.timeseries.python.timeseries import state_management from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model from tensorflow.contrib.timeseries.python.timeseries.state_space_models import structural_ensemble @@ -59,9 +59,10 @@ class TimeSeriesRegressor(estimator_lib.Estimator): if optimizer is None: optimizer = train.AdamOptimizer(0.02) self._model = model - model_fn = model_utils.make_model_fn( + ts_regression_head = ts_head_lib.time_series_regression_head( model, state_manager, optimizer, input_statistics_generator=input_statistics_generator) + model_fn = ts_regression_head.create_estimator_spec super(TimeSeriesRegressor, self).__init__( model_fn=model_fn, model_dir=model_dir, @@ -132,7 +133,7 @@ class TimeSeriesRegressor(estimator_lib.Estimator): with ops.Graph().as_default(): self._model.initialize_graph() model_start_state = self._model.get_start_state() - for prefixed_state_name, state_tensor in model_utils.state_to_dictionary( + for prefixed_state_name, state_tensor in ts_head_lib.state_to_dictionary( model_start_state).items(): state_shape_with_batch = tensor_shape.TensorShape( (default_batch_size,)).concatenate(state_tensor.get_shape()) diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py new file mode 100644 index 0000000000..5896fc2a20 --- /dev/null +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -0,0 +1,375 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Timeseries head.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re + +from tensorflow.contrib.framework.python.ops import variables +from tensorflow.contrib.layers.python.layers import optimizers + +from tensorflow.contrib.timeseries.python.timeseries import feature_keys + +from tensorflow.python.estimator import estimator_lib +from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.export import export_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.util import nest + + +def time_series_regression_head(model, + state_manager, + optimizer, + input_statistics_generator=None): + """Creates a `_Head` for time series regression. + + Args: + model: A model for time series regression. + state_manager: A state manager. + optimizer: An optimizer. + input_statistics_generator: A input statistics generator. + + Returns: + An instance of `_Head` for time series regression. + """ + return _TimeSeriesRegressionHead(model, state_manager, optimizer, + input_statistics_generator) + + +class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-access + """See `time_series_regression_head`.""" + + def __init__(self, + model, + state_manager, + optimizer, + input_statistics_generator=None, + name=None): + self.model = model + self.state_manager = state_manager + self.optimizer = optimizer + self.input_statistics_generator = input_statistics_generator + self._name = name + + def _train_ops(self, features): + """Add training ops to the graph.""" + with variable_scope.variable_scope("model"): + model_outputs = self.state_manager.define_loss( + self.model, features, estimator_lib.ModeKeys.TRAIN) + + train_op = optimizers.optimize_loss( + model_outputs.loss, + global_step=variables.get_global_step(), + optimizer=self.optimizer, + # Learning rate is set in the Optimizer object + learning_rate=None) + return estimator_lib.EstimatorSpec( + loss=model_outputs.loss, + mode=estimator_lib.ModeKeys.TRAIN, + train_op=train_op) + + # TODO(terrytangyuan): suffix summary and metrics keys by `"/" + name` + @property + def name(self): + return self._name + + # TODO(terrytangyuan): unused for now. Need to decouple + # `state_manager.define_loss` to satisfy the extendable return signature of + # `_Head.create_loss`. + def create_loss(self, features, mode, logits, labels): + """See `_Head`.""" + return None + + # TODO(terrytangyuan): check label dimension + @property + def logits_dimension(self): + return None + + def _evaluate_ops(self, features): + """Add ops for evaluation (aka filtering) to the graph.""" + with variable_scope.variable_scope("model"): + model_outputs = self.state_manager.define_loss( + self.model, features, estimator_lib.ModeKeys.EVAL) + metrics = {} + # Just output in-sample predictions for the last chunk seen + for prediction_key, prediction_value in model_outputs.predictions.items(): + metrics[prediction_key] = _identity_metric_single(prediction_key, + prediction_value) + metrics[feature_keys.FilteringResults.TIMES] = _identity_metric_single( + feature_keys.FilteringResults.TIMES, model_outputs.prediction_times) + metrics[feature_keys.FilteringResults.STATE_TUPLE] = ( + _identity_metric_nested(feature_keys.FilteringResults.STATE_TUPLE, + model_outputs.end_state)) + return estimator_lib.EstimatorSpec( + loss=model_outputs.loss, + mode=estimator_lib.ModeKeys.EVAL, + eval_metric_ops=metrics, + predictions={}) + + def _predict_ops(self, features): + """Add ops for prediction to the graph.""" + with variable_scope.variable_scope("model"): + prediction = self.model.predict(features=features) + prediction[feature_keys.PredictionResults.TIMES] = features[ + feature_keys.PredictionFeatures.TIMES] + return estimator_lib.EstimatorSpec( + predictions=prediction, mode=estimator_lib.ModeKeys.PREDICT) + + def _serving_ops(self, features): + """Add ops for serving to the graph.""" + with variable_scope.variable_scope("model"): + prediction_outputs = self.model.predict(features=features) + with variable_scope.variable_scope("model", reuse=True): + filtering_outputs = self.state_manager.define_loss( + self.model, features, estimator_lib.ModeKeys.EVAL) + + return estimator_lib.EstimatorSpec( + mode=estimator_lib.ModeKeys.PREDICT, + export_outputs={ + feature_keys.SavedModelLabels.PREDICT: + export_lib.PredictOutput(prediction_outputs), + feature_keys.SavedModelLabels.FILTER: + export_lib.PredictOutput( + state_to_dictionary(filtering_outputs.end_state)) + }, + # Likely unused, but it is necessary to return `predictions` to satisfy + # the Estimator's error checking. + predictions={}) + + def _convert_feature_to_tensor(self, name, value): + """Casts features to the correct dtype based on their name.""" + if name in [ + feature_keys.TrainEvalFeatures.TIMES, + feature_keys.PredictionFeatures.TIMES + ]: + return math_ops.cast(value, dtypes.int64) + if name == feature_keys.TrainEvalFeatures.VALUES: + return math_ops.cast(value, self.model.dtype) + if name == feature_keys.PredictionFeatures.STATE_TUPLE: + return value # Correct dtypes are model-dependent + return ops.convert_to_tensor(value) + + def _gather_state(self, features): + """Returns `features` with state packed, indicates if packing was done.""" + prefixed_state_re = re.compile(r"^" + feature_keys.State.STATE_PREFIX + + r"_(\d+)$") + numbered_state = [] + for key, tensor in features.items(): + search_result = prefixed_state_re.search(key) + if search_result: + numbered_state.append((int(search_result.group(1)), key, tensor)) + if not numbered_state: + return features, False + features = features.copy() + for _, key, _ in numbered_state: + del features[key] + numbered_state.sort(key=lambda number, *_: number) + features[feature_keys.State.STATE_TUPLE] = nest.pack_sequence_as( + structure=self.model.get_start_state(), + flat_sequence=[tensor for _, _, tensor in numbered_state]) + return features, True + + def create_estimator_spec(self, features, mode, labels=None): + """Performs basic error checking and returns an EstimatorSpec.""" + with ops.name_scope("head"): + if labels: + raise ValueError( + "The model received a `labels` dictionary, which is " + "not supported. Pass '{}' and '{}' as " + "features.".format(feature_keys.TrainEvalFeatures.TIMES, + feature_keys.TrainEvalFeatures.VALUES)) + del labels + features = { + name: self._convert_feature_to_tensor(name=name, value=value) + for name, value in features.items() + } + if self.input_statistics_generator is not None: + input_statistics = self.input_statistics_generator.initialize_graph( + features, update_statistics=(mode == estimator_lib.ModeKeys.TRAIN)) + else: + input_statistics = None + self.model.initialize_graph(input_statistics=input_statistics) + + # _gather_state requires the model to have its graph initialized (so it + # has access to the structure of the model's state) + features, passed_flat_state = self._gather_state(features) + if (mode == estimator_lib.ModeKeys.TRAIN or + mode == estimator_lib.ModeKeys.EVAL): + _check_train_eval_features(features, self.model) + elif mode == estimator_lib.ModeKeys.PREDICT: + _check_predict_features(features) + else: + raise ValueError("Unknown mode '{}' passed to model_fn.".format(mode)) + + self.state_manager.initialize_graph( + model=self.model, input_statistics=input_statistics) + + if mode == estimator_lib.ModeKeys.TRAIN: + return self._train_ops(features) + elif mode == estimator_lib.ModeKeys.EVAL: + return self._evaluate_ops(features) + elif mode == estimator_lib.ModeKeys.PREDICT and not passed_flat_state: + return self._predict_ops(features) + elif mode == estimator_lib.ModeKeys.PREDICT and passed_flat_state: + # The mode is PREDICT, but we're actually in export_savedmodel for + # serving. We want to return two graphs: one for filtering (state + data + # -> state) and one for predicting (state -> prediction). + return self._serving_ops(features) + + +def _check_feature_shapes_compatible_with(features, + compatible_with_name, + compatible_with_value, + ignore=None): + """Checks all features are compatible with the given time-like feature.""" + if ignore is None: + ignore = set() + for name, value in features.items(): + if name in ignore: + continue + feature_shape = value.get_shape() + if feature_shape.ndims is None: + continue + if feature_shape.ndims < 2: + raise ValueError( + ("Features must have shape (batch dimension, window size, ...) " + "(got rank {} for feature '{}')").format(feature_shape.ndims, name)) + if not feature_shape[:2].is_compatible_with( + compatible_with_value.get_shape()): + raise ValueError( + ("Features must have shape (batch dimension, window size, ...) " + "where batch dimension and window size match the " + "'{times_feature}' feature (got shape {feature_shape} for " + "feature '{feature_name}' but shape {times_shape} for feature " + "'{times_feature}')").format( + times_feature=compatible_with_name, + feature_shape=feature_shape, + feature_name=name, + times_shape=compatible_with_value.get_shape())) + + +def _check_predict_features(features): + """Raises errors if features are not suitable for prediction.""" + if feature_keys.PredictionFeatures.TIMES not in features: + raise ValueError("Expected a '{}' feature for prediction.".format( + feature_keys.PredictionFeatures.TIMES)) + if feature_keys.PredictionFeatures.STATE_TUPLE not in features: + raise ValueError("Expected a '{}' feature for prediction.".format( + feature_keys.PredictionFeatures.STATE_TUPLE)) + times_feature = features[feature_keys.PredictionFeatures.TIMES] + if not times_feature.get_shape().is_compatible_with([None, None]): + raise ValueError( + ("Expected shape (batch dimension, window size) for feature '{}' " + "(got shape {})").format(feature_keys.PredictionFeatures.TIMES, + times_feature.get_shape())) + _check_feature_shapes_compatible_with( + features=features, + compatible_with_name=feature_keys.PredictionFeatures.TIMES, + compatible_with_value=times_feature, + ignore=set([ + feature_keys.PredictionFeatures.STATE_TUPLE # Model-dependent shapes + ])) + + +def _check_train_eval_features(features, model): + """Raise errors if features are not suitable for training/evaluation.""" + if feature_keys.TrainEvalFeatures.TIMES not in features: + raise ValueError("Expected a '{}' feature for training/evaluation.".format( + feature_keys.TrainEvalFeatures.TIMES)) + if feature_keys.TrainEvalFeatures.VALUES not in features: + raise ValueError("Expected a '{}' feature for training/evaluation.".format( + feature_keys.TrainEvalFeatures.VALUES)) + times_feature = features[feature_keys.TrainEvalFeatures.TIMES] + if not times_feature.get_shape().is_compatible_with([None, None]): + raise ValueError( + ("Expected shape (batch dimension, window size) for feature '{}' " + "(got shape {})").format(feature_keys.TrainEvalFeatures.TIMES, + times_feature.get_shape())) + values_feature = features[feature_keys.TrainEvalFeatures.VALUES] + if not values_feature.get_shape().is_compatible_with( + [None, None, model.num_features]): + raise ValueError( + ("Expected shape (batch dimension, window size, {num_features}) " + "for feature '{feature_name}', since the model was configured " + "with num_features={num_features} (got shape {got_shape})").format( + num_features=model.num_features, + feature_name=feature_keys.TrainEvalFeatures.VALUES, + got_shape=times_feature.get_shape())) + _check_feature_shapes_compatible_with( + features=features, + compatible_with_name=feature_keys.TrainEvalFeatures.TIMES, + compatible_with_value=times_feature, + ignore=set([ + feature_keys.State.STATE_TUPLE # Model-dependent shapes + ])) + + +def _identity_metric_single(name, input_tensor): + """A metric which takes on its last updated value. + + This keeps evaluation metrics in sync with one another, since update ops are + run separately from their result Tensors. Simply returning (input_tensor, + no_op) as a metric with a value but no update means that a metric will come + from a different batch of data than metrics which cache values in a Variable + (e.g. the default loss metric). + + Args: + name: A name for the metric. + input_tensor: Any Tensor. + Returns: + A tuple of (value, update_op). + """ + metric_variable = variable_scope.variable( + name="{}_identity_metric".format(name), + initial_value=array_ops.zeros([], dtype=input_tensor.dtype), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + validate_shape=False) + update_op = state_ops.assign( + metric_variable, input_tensor, validate_shape=False) + # This shape will be correct once the first update runs (but may be + # incomplete, so is not helpful for initializing the variable). + metric_variable.set_shape(input_tensor.get_shape()) + return (metric_variable.value(), update_op) + + +def _identity_metric_nested(name, input_tensors): + """Create identity metrics for a nested tuple of Tensors.""" + update_ops = [] + value_tensors = [] + for tensor_number, tensor in enumerate(nest.flatten(input_tensors)): + value_tensor, update_op = _identity_metric_single( + name="{}_{}".format(name, tensor_number), input_tensor=tensor) + update_ops.append(update_op) + value_tensors.append(value_tensor) + return (nest.pack_sequence_as(input_tensors, value_tensors), + control_flow_ops.group(*update_ops)) + + +def state_to_dictionary(state_tuple): + """Flatten model state into a dictionary with string keys.""" + flattened = {} + for state_number, state_value in enumerate(nest.flatten(state_tuple)): + prefixed_state_name = "{}_{:02d}".format(feature_keys.State.STATE_PREFIX, + state_number) + flattened[prefixed_state_name] = state_value + return flattened diff --git a/tensorflow/contrib/timeseries/python/timeseries/head_test.py b/tensorflow/contrib/timeseries/python/timeseries/head_test.py new file mode 100644 index 0000000000..3415061cfd --- /dev/null +++ b/tensorflow/contrib/timeseries/python/timeseries/head_test.py @@ -0,0 +1,267 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for head.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.timeseries.python.timeseries import feature_keys +from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib +from tensorflow.contrib.timeseries.python.timeseries import model +from tensorflow.contrib.timeseries.python.timeseries import state_management + +from tensorflow.python.estimator import estimator_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import coordinator as coordinator_lib +from tensorflow.python.training import queue_runner_impl +from tensorflow.python.training import training as train + + +class HeadTest(test.TestCase): + + def test_labels_provided_error(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL, + estimator_lib.ModeKeys.PREDICT]: + with self.assertRaisesRegexp(ValueError, "labels"): + model_fn(features={}, labels={"a": "b"}, mode=mode) + + def test_unknown_mode(self): + model_fn = _stub_model_fn() + with self.assertRaisesRegexp(ValueError, "Unknown mode 'Not a mode'"): + model_fn(features={}, labels={}, mode="Not a mode") + + +class _TickerModel(object): + num_features = 1 + dtype = dtypes.float32 + + def initialize_graph(self, input_statistics): + pass + + def define_loss(self, features, mode): + del mode # unused + return model.ModelOutputs( + loss=features["ticker"], + end_state=(features["ticker"], features["ticker"]), + prediction_times=array_ops.zeros(()), + predictions={"ticker": features["ticker"]}) + + +class EvaluationMetricsTests(test.TestCase): + + def test_metrics_consistent(self): + # Tests that the identity metrics used to report in-sample predictions match + # the behavior of standard metrics. + g = ops.Graph() + with g.as_default(): + features = { + feature_keys.TrainEvalFeatures.TIMES: + array_ops.zeros((1, 1)), + feature_keys.TrainEvalFeatures.VALUES: + array_ops.zeros((1, 1, 1)), + "ticker": + array_ops.reshape( + math_ops.cast( + variables.Variable( + name="ticker", + initial_value=0, + dtype=dtypes.int64, + collections=[ops.GraphKeys.LOCAL_VARIABLES]) + .count_up_to(10), + dtype=dtypes.float32), (1, 1, 1)) + } + model_fn = ts_head_lib.time_series_regression_head( + model=_TickerModel(), + state_manager=state_management.PassthroughStateManager(), + optimizer=train.GradientDescentOptimizer(0.001)).create_estimator_spec + outputs = model_fn( + features=features, labels=None, mode=estimator_lib.ModeKeys.EVAL) + metric_update_ops = [ + metric[1] for metric in outputs.eval_metric_ops.values()] + loss_mean, loss_update = metrics.mean(outputs.loss) + metric_update_ops.append(loss_update) + with self.test_session() as sess: + coordinator = coordinator_lib.Coordinator() + queue_runner_impl.start_queue_runners(sess, coord=coordinator) + variables.local_variables_initializer().run() + sess.run(metric_update_ops) + loss_evaled, metric_evaled, nested_metric_evaled = sess.run( + (loss_mean, outputs.eval_metric_ops["ticker"][0], + outputs.eval_metric_ops[feature_keys.FilteringResults.STATE_TUPLE][ + 0][0])) + # The custom model_utils metrics for in-sample predictions should be in + # sync with the Estimator's mean metric for model loss. + self.assertAllClose(0., loss_evaled) + self.assertAllClose((((0.,),),), metric_evaled) + self.assertAllClose((((0.,),),), nested_metric_evaled) + coordinator.request_stop() + coordinator.join() + + +class _StubModel(object): + num_features = 3 + dtype = dtypes.float64 + + def initialize_graph(self, input_statistics): + del input_statistics # unused + + +def _stub_model_fn(): + return ts_head_lib.time_series_regression_head( + model=_StubModel(), + state_manager=state_management.PassthroughStateManager(), + optimizer=train.AdamOptimizer(0.001)).create_estimator_spec + + +class TrainEvalFeatureCheckingTests(test.TestCase): + + def test_no_time_feature(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( + feature_keys.TrainEvalFeatures.TIMES)): + model_fn( + features={feature_keys.TrainEvalFeatures.VALUES: [[[1.]]]}, + labels=None, + mode=mode) + + def test_no_value_feature(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( + feature_keys.TrainEvalFeatures.VALUES)): + model_fn( + features={feature_keys.TrainEvalFeatures.TIMES: [[1]]}, + labels=None, + mode=mode) + + def test_bad_time_rank(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp(ValueError, + "Expected shape.*for feature '{}'".format( + feature_keys.TrainEvalFeatures.TIMES)): + model_fn( + features={ + feature_keys.TrainEvalFeatures.TIMES: [[[1]]], + feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] + }, + labels=None, + mode=mode) + + def test_bad_value_rank(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp(ValueError, + "Expected shape.*for feature '{}'".format( + feature_keys.TrainEvalFeatures.VALUES)): + model_fn( + features={ + feature_keys.TrainEvalFeatures.TIMES: [[1]], + feature_keys.TrainEvalFeatures.VALUES: [[1.]] + }, + labels=None, + mode=mode) + + def test_bad_value_num_features(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp( + ValueError, "Expected shape.*, 3.*for feature '{}'".format( + feature_keys.TrainEvalFeatures.VALUES)): + model_fn( + features={ + feature_keys.TrainEvalFeatures.TIMES: [[1]], + feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] + }, + labels=None, + mode=mode) + + def test_bad_exogenous_shape(self): + model_fn = _stub_model_fn() + for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: + with self.assertRaisesRegexp( + ValueError, + "Features must have shape.*for feature 'exogenous'"): + model_fn( + features={ + feature_keys.TrainEvalFeatures.TIMES: [[1]], + feature_keys.TrainEvalFeatures.VALUES: [[[1., 2., 3.]]], + "exogenous": [[1], [2]] + }, + labels=None, + mode=mode) + + +class PredictFeatureCheckingTests(test.TestCase): + + def test_no_time_feature(self): + model_fn = _stub_model_fn() + with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( + feature_keys.PredictionFeatures.TIMES)): + model_fn( + features={ + feature_keys.PredictionFeatures.STATE_TUPLE: ([[[1.]]], 1.) + }, + labels=None, + mode=estimator_lib.ModeKeys.PREDICT) + + def test_no_start_state_feature(self): + model_fn = _stub_model_fn() + with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( + feature_keys.PredictionFeatures.STATE_TUPLE)): + model_fn( + features={feature_keys.PredictionFeatures.TIMES: [[1]]}, + labels=None, + mode=estimator_lib.ModeKeys.PREDICT) + + def test_bad_time_rank(self): + model_fn = _stub_model_fn() + with self.assertRaisesRegexp(ValueError, + "Expected shape.*for feature '{}'".format( + feature_keys.PredictionFeatures.TIMES)): + model_fn( + features={ + feature_keys.PredictionFeatures.TIMES: 1, + feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)) + }, + labels=None, + mode=estimator_lib.ModeKeys.PREDICT) + + def test_bad_exogenous_shape(self): + model_fn = _stub_model_fn() + with self.assertRaisesRegexp( + ValueError, + "Features must have shape.*for feature 'exogenous'"): + model_fn( + features={ + feature_keys.PredictionFeatures.TIMES: [[1]], + feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)), + "exogenous": 1. + }, + labels=None, + mode=estimator_lib.ModeKeys.PREDICT) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/model_utils.py b/tensorflow/contrib/timeseries/python/timeseries/model_utils.py index addcdb0575..b5d7cb376b 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model_utils.py @@ -18,334 +18,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import re - import numpy -from tensorflow.contrib.framework.python.ops import variables -from tensorflow.contrib.layers.python.layers import optimizers - from tensorflow.contrib.timeseries.python.timeseries import feature_keys -from tensorflow.python.estimator import estimator_lib -from tensorflow.python.estimator.export import export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.util import nest - - -def _check_feature_shapes_compatible_with( - features, compatible_with_name, compatible_with_value, ignore=None): - """Checks all features are compatible with the given time-like feature.""" - if ignore is None: - ignore = set() - for name, value in features.items(): - if name in ignore: - continue - feature_shape = value.get_shape() - if feature_shape.ndims is None: - continue - if feature_shape.ndims < 2: - raise ValueError( - ("Features must have shape (batch dimension, window size, ...) " - "(got rank {} for feature '{}')").format( - feature_shape.ndims, name)) - if not feature_shape[:2].is_compatible_with( - compatible_with_value.get_shape()): - raise ValueError( - ("Features must have shape (batch dimension, window size, ...) " - "where batch dimension and window size match the " - "'{times_feature}' feature (got shape {feature_shape} for " - "feature '{feature_name}' but shape {times_shape} for feature " - "'{times_feature}')").format( - times_feature=compatible_with_name, - feature_shape=feature_shape, - feature_name=name, - times_shape=compatible_with_value.get_shape())) - - -def _check_predict_features(features): - """Raises errors if features are not suitable for prediction.""" - if feature_keys.PredictionFeatures.TIMES not in features: - raise ValueError("Expected a '{}' feature for prediction.".format( - feature_keys.PredictionFeatures.TIMES)) - if feature_keys.PredictionFeatures.STATE_TUPLE not in features: - raise ValueError("Expected a '{}' feature for prediction.".format( - feature_keys.PredictionFeatures.STATE_TUPLE)) - times_feature = features[feature_keys.PredictionFeatures.TIMES] - if not times_feature.get_shape().is_compatible_with([None, None]): - raise ValueError( - ("Expected shape (batch dimension, window size) for feature '{}' " - "(got shape {})").format(feature_keys.PredictionFeatures.TIMES, - times_feature.get_shape())) - _check_feature_shapes_compatible_with( - features=features, - compatible_with_name=feature_keys.PredictionFeatures.TIMES, - compatible_with_value=times_feature, - ignore=set([ - feature_keys.PredictionFeatures.STATE_TUPLE # Model-dependent shapes - ])) - - -def _check_train_eval_features(features, model): - """Raise errors if features are not suitable for training/evaluation.""" - if feature_keys.TrainEvalFeatures.TIMES not in features: - raise ValueError("Expected a '{}' feature for training/evaluation.".format( - feature_keys.TrainEvalFeatures.TIMES)) - if feature_keys.TrainEvalFeatures.VALUES not in features: - raise ValueError("Expected a '{}' feature for training/evaluation.".format( - feature_keys.TrainEvalFeatures.VALUES)) - times_feature = features[feature_keys.TrainEvalFeatures.TIMES] - if not times_feature.get_shape().is_compatible_with([None, None]): - raise ValueError( - ("Expected shape (batch dimension, window size) for feature '{}' " - "(got shape {})").format(feature_keys.TrainEvalFeatures.TIMES, - times_feature.get_shape())) - values_feature = features[feature_keys.TrainEvalFeatures.VALUES] - if not values_feature.get_shape().is_compatible_with( - [None, None, model.num_features]): - raise ValueError( - ("Expected shape (batch dimension, window size, {num_features}) " - "for feature '{feature_name}', since the model was configured " - "with num_features={num_features} (got shape {got_shape})").format( - num_features=model.num_features, - feature_name=feature_keys.TrainEvalFeatures.VALUES, - got_shape=times_feature.get_shape())) - _check_feature_shapes_compatible_with( - features=features, - compatible_with_name=feature_keys.TrainEvalFeatures.TIMES, - compatible_with_value=times_feature, - ignore=set([ - feature_keys.State.STATE_TUPLE # Model-dependent shapes - ])) - - -def _identity_metric_single(name, input_tensor): - """A metric which takes on its last updated value. - - This keeps evaluation metrics in sync with one another, since update ops are - run separately from their result Tensors. Simply returning (input_tensor, - no_op) as a metric with a value but no update means that a metric will come - from a different batch of data than metrics which cache values in a Variable - (e.g. the default loss metric). - - Args: - name: A name for the metric. - input_tensor: Any Tensor. - Returns: - A tuple of (value, update_op). - """ - metric_variable = variable_scope.variable( - name="{}_identity_metric".format(name), - initial_value=array_ops.zeros([], dtype=input_tensor.dtype), - collections=[ops.GraphKeys.LOCAL_VARIABLES], - validate_shape=False) - update_op = state_ops.assign(metric_variable, input_tensor, - validate_shape=False) - # This shape will be correct once the first update runs (but may be - # incomplete, so is not helpful for initializing the variable). - metric_variable.set_shape(input_tensor.get_shape()) - return (metric_variable.value(), update_op) - - -def _identity_metric_nested(name, input_tensors): - """Create identity metrics for a nested tuple of Tensors.""" - update_ops = [] - value_tensors = [] - for tensor_number, tensor in enumerate(nest.flatten(input_tensors)): - value_tensor, update_op = _identity_metric_single( - name="{}_{}".format(name, tensor_number), - input_tensor=tensor) - update_ops.append(update_op) - value_tensors.append(value_tensor) - return (nest.pack_sequence_as(input_tensors, value_tensors), - control_flow_ops.group(*update_ops)) - - -def state_to_dictionary(state_tuple): - """Flatten model state into a dictionary with string keys.""" - flattened = {} - for state_number, state_value in enumerate(nest.flatten(state_tuple)): - prefixed_state_name = "{}_{:02d}".format(feature_keys.State.STATE_PREFIX, - state_number) - flattened[prefixed_state_name] = state_value - return flattened - - -def make_model_fn( - model, state_manager, optimizer, input_statistics_generator=None): - """Returns a model function suitable for use with a tf.estimator. - - Args: - model: The object (inheriting from Model) to create a function for. - state_manager: A state manager to wrap the model with (or - PassthroughStateManager if no state needs to be managed). - optimizer: An instance of `tf.train.Optimizer` to use for training. - input_statistics_generator: An InputStatisticsFromMiniBatch object from - math_utils.py, used for collecting statistics about input data during - training. - Returns: - The model function, suitable for passing to a tf.estimator.Estimator. - """ - - def _convert_feature_to_tensor(name, value): - """Casts features to the correct dtype based on their name.""" - if name in [ - feature_keys.TrainEvalFeatures.TIMES, - feature_keys.PredictionFeatures.TIMES - ]: - return math_ops.cast(value, dtypes.int64) - if name == feature_keys.TrainEvalFeatures.VALUES: - return math_ops.cast(value, model.dtype) - if name == feature_keys.PredictionFeatures.STATE_TUPLE: - return value # Correct dtypes are model-dependent - return ops.convert_to_tensor(value) - - def _gather_state(features): - """Returns `features` with state packed, indicates if packing was done.""" - prefixed_state_re = re.compile(r"^" + feature_keys.State.STATE_PREFIX + - r"_(\d+)$") - numbered_state = [] - for key, tensor in features.items(): - search_result = prefixed_state_re.search(key) - if search_result: - numbered_state.append((int(search_result.group(1)), key, tensor)) - if not numbered_state: - return features, False - features = features.copy() - for _, key, _ in numbered_state: - del features[key] - numbered_state.sort(key=lambda number, *_: number) - features[feature_keys.State.STATE_TUPLE] = nest.pack_sequence_as( - structure=model.get_start_state(), - flat_sequence=[tensor for _, _, tensor in numbered_state]) - return features, True - - def _train(features): - """Add training ops to the graph.""" - with variable_scope.variable_scope("model"): - model_outputs = state_manager.define_loss(model, features, - estimator_lib.ModeKeys.TRAIN) - train_op = optimizers.optimize_loss( - model_outputs.loss, - global_step=variables.get_global_step(), - optimizer=optimizer, - # Learning rate is set in the Optimizer object - learning_rate=None) - return estimator_lib.EstimatorSpec( - loss=model_outputs.loss, - mode=estimator_lib.ModeKeys.TRAIN, - train_op=train_op) - - def _evaluate(features): - """Add ops for evaluation (aka filtering) to the graph.""" - with variable_scope.variable_scope("model"): - model_outputs = state_manager.define_loss(model, features, - estimator_lib.ModeKeys.EVAL) - metrics = {} - # Just output in-sample predictions for the last chunk seen - for prediction_key, prediction_value in model_outputs.predictions.items(): - metrics[prediction_key] = _identity_metric_single(prediction_key, - prediction_value) - metrics[feature_keys.FilteringResults.TIMES] = _identity_metric_single( - feature_keys.FilteringResults.TIMES, model_outputs.prediction_times) - metrics[feature_keys.FilteringResults.STATE_TUPLE] = ( - _identity_metric_nested(feature_keys.FilteringResults.STATE_TUPLE, - model_outputs.end_state)) - return estimator_lib.EstimatorSpec( - loss=model_outputs.loss, - mode=estimator_lib.ModeKeys.EVAL, - eval_metric_ops=metrics, - predictions={}) - - def _predict(features): - """Add ops for prediction to the graph.""" - with variable_scope.variable_scope("model"): - prediction = model.predict(features=features) - prediction[feature_keys.PredictionResults.TIMES] = features[ - feature_keys.PredictionFeatures.TIMES] - return estimator_lib.EstimatorSpec( - predictions=prediction, mode=estimator_lib.ModeKeys.PREDICT) - - def _serving(features): - with variable_scope.variable_scope("model"): - prediction_outputs = model.predict(features=features) - with variable_scope.variable_scope("model", reuse=True): - filtering_outputs = state_manager.define_loss(model, features, - estimator_lib.ModeKeys.EVAL) - return estimator_lib.EstimatorSpec( - mode=estimator_lib.ModeKeys.PREDICT, - export_outputs={ - feature_keys.SavedModelLabels.PREDICT: - export_lib.PredictOutput(prediction_outputs), - feature_keys.SavedModelLabels.FILTER: - export_lib.PredictOutput( - state_to_dictionary(filtering_outputs.end_state)) - }, - # Likely unused, but it is necessary to return `predictions` to satisfy - # the Estimator's error checking. - predictions={}) - - def _model_fn(features, labels, mode): - """Given a time series in `features`, define a loss for `mode`. - - Args: - features: A dictionary, the output of a chunker (typically with keys - feature_keys.TrainEvalFeatures.TIMES and - feature_keys.TrainEvalFeatures.VALUES). - labels: Not used; included for compatibility with tf.learn. - mode: The tf.estimator.ModeKeys mode to use (TRAIN, EVAL, INFER). - Returns: - A tuple of predictions, a loss Tensor, and a train op. - Raises: - ValueError: If the model makes predictions which do not have static shape - information. - """ - if labels: - raise ValueError("The model received a `labels` dictionary, which is not" - " supported. Pass '{}' and '{}' as features.".format( - feature_keys.TrainEvalFeatures.TIMES, - feature_keys.TrainEvalFeatures.VALUES)) - del labels - features = {name: _convert_feature_to_tensor(name=name, value=value) - for name, value in features.items()} - if input_statistics_generator is not None: - input_statistics = input_statistics_generator.initialize_graph( - features, update_statistics=(mode == estimator_lib.ModeKeys.TRAIN)) - else: - input_statistics = None - model.initialize_graph(input_statistics=input_statistics) - # _gather_state requires the model to have its graph initialized (so it has - # access to the structure of the model's state) - features, passed_flat_state = _gather_state(features) - if (mode == estimator_lib.ModeKeys.TRAIN - or mode == estimator_lib.ModeKeys.EVAL): - _check_train_eval_features(features, model) - elif mode == estimator_lib.ModeKeys.PREDICT: - _check_predict_features(features) - else: - raise ValueError("Unknown mode '{}' passed to model_fn.".format(mode)) - state_manager.initialize_graph( - model=model, input_statistics=input_statistics) - if mode == estimator_lib.ModeKeys.TRAIN: - return _train(features) - elif mode == estimator_lib.ModeKeys.EVAL: - return _evaluate(features) - elif mode == estimator_lib.ModeKeys.PREDICT and not passed_flat_state: - return _predict(features) - elif mode == estimator_lib.ModeKeys.PREDICT and passed_flat_state: - # The mode is PREDICT, but we're actually in export_savedmodel for - # serving. We want to return two graphs: one for filtering (state + data - # -> state) and one for predicting (state -> prediction). - return _serving(features) - return _model_fn # TODO(agarwal): Remove and replace with functionality from tf.slim diff --git a/tensorflow/contrib/timeseries/python/timeseries/model_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/model_utils_test.py index 2998689554..cfd31cc70d 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model_utils_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model_utils_test.py @@ -18,22 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.timeseries.python.timeseries import feature_keys -from tensorflow.contrib.timeseries.python.timeseries import model from tensorflow.contrib.timeseries.python.timeseries import model_utils -from tensorflow.contrib.timeseries.python.timeseries import state_management -from tensorflow.python.estimator import estimator_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import metrics -from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.training import coordinator as coordinator_lib -from tensorflow.python.training import queue_runner_impl -from tensorflow.python.training import training as train class ModelUtilsTest(test.TestCase): @@ -46,230 +34,6 @@ class ModelUtilsTest(test.TestCase): self.assertEqual(5, getter(parameter)) self.assertEqual(4, getter(overridden_parameter)) - def test_labels_provided_error(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL, - estimator_lib.ModeKeys.PREDICT]: - with self.assertRaisesRegexp(ValueError, "labels"): - model_fn(features={}, labels={"a": "b"}, mode=mode) - - def test_unknown_mode(self): - model_fn = _stub_model_fn() - with self.assertRaisesRegexp(ValueError, "Unknown mode 'Not a mode'"): - model_fn(features={}, labels={}, mode="Not a mode") - - -class _TickerModel(object): - num_features = 1 - dtype = dtypes.float32 - - def initialize_graph(self, input_statistics): - pass - - def define_loss(self, features, mode): - del mode # unused - return model.ModelOutputs( - loss=features["ticker"], - end_state=(features["ticker"], features["ticker"]), - prediction_times=array_ops.zeros(()), - predictions={"ticker": features["ticker"]}) - - -class EvaluationMetricsTests(test.TestCase): - - def test_metrics_consistent(self): - # Tests that the identity metrics used to report in-sample predictions match - # the behavior of standard metrics. - g = ops.Graph() - with g.as_default(): - features = { - feature_keys.TrainEvalFeatures.TIMES: - array_ops.zeros((1, 1)), - feature_keys.TrainEvalFeatures.VALUES: - array_ops.zeros((1, 1, 1)), - "ticker": - array_ops.reshape( - math_ops.cast( - variables.Variable( - name="ticker", - initial_value=0, - dtype=dtypes.int64, - collections=[ops.GraphKeys.LOCAL_VARIABLES]) - .count_up_to(10), - dtype=dtypes.float32), (1, 1, 1)) - } - model_fn = model_utils.make_model_fn( - model=_TickerModel(), - state_manager=state_management.PassthroughStateManager(), - optimizer=train.GradientDescentOptimizer(0.001)) - outputs = model_fn( - features=features, labels=None, mode=estimator_lib.ModeKeys.EVAL) - metric_update_ops = [ - metric[1] for metric in outputs.eval_metric_ops.values()] - loss_mean, loss_update = metrics.mean(outputs.loss) - metric_update_ops.append(loss_update) - with self.test_session() as sess: - coordinator = coordinator_lib.Coordinator() - queue_runner_impl.start_queue_runners(sess, coord=coordinator) - variables.local_variables_initializer().run() - sess.run(metric_update_ops) - loss_evaled, metric_evaled, nested_metric_evaled = sess.run( - (loss_mean, outputs.eval_metric_ops["ticker"][0], - outputs.eval_metric_ops[feature_keys.FilteringResults.STATE_TUPLE][ - 0][0])) - # The custom model_utils metrics for in-sample predictions should be in - # sync with the Estimator's mean metric for model loss. - self.assertAllClose(0., loss_evaled) - self.assertAllClose((((0.,),),), metric_evaled) - self.assertAllClose((((0.,),),), nested_metric_evaled) - coordinator.request_stop() - coordinator.join() - - -class _StubModel(object): - num_features = 3 - dtype = dtypes.float64 - - def initialize_graph(self, input_statistics): - del input_statistics # unused - - -def _stub_model_fn(): - return model_utils.make_model_fn( - model=_StubModel(), - state_manager=state_management.PassthroughStateManager(), - optimizer=train.AdamOptimizer(0.001)) - - -class TrainEvalFeatureCheckingTests(test.TestCase): - - def test_no_time_feature(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( - feature_keys.TrainEvalFeatures.TIMES)): - model_fn( - features={feature_keys.TrainEvalFeatures.VALUES: [[[1.]]]}, - labels=None, - mode=mode) - - def test_no_value_feature(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( - feature_keys.TrainEvalFeatures.VALUES)): - model_fn( - features={feature_keys.TrainEvalFeatures.TIMES: [[1]]}, - labels=None, - mode=mode) - - def test_bad_time_rank(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp(ValueError, - "Expected shape.*for feature '{}'".format( - feature_keys.TrainEvalFeatures.TIMES)): - model_fn( - features={ - feature_keys.TrainEvalFeatures.TIMES: [[[1]]], - feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] - }, - labels=None, - mode=mode) - - def test_bad_value_rank(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp(ValueError, - "Expected shape.*for feature '{}'".format( - feature_keys.TrainEvalFeatures.VALUES)): - model_fn( - features={ - feature_keys.TrainEvalFeatures.TIMES: [[1]], - feature_keys.TrainEvalFeatures.VALUES: [[1.]] - }, - labels=None, - mode=mode) - - def test_bad_value_num_features(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp( - ValueError, "Expected shape.*, 3.*for feature '{}'".format( - feature_keys.TrainEvalFeatures.VALUES)): - model_fn( - features={ - feature_keys.TrainEvalFeatures.TIMES: [[1]], - feature_keys.TrainEvalFeatures.VALUES: [[[1.]]] - }, - labels=None, - mode=mode) - - def test_bad_exogenous_shape(self): - model_fn = _stub_model_fn() - for mode in [estimator_lib.ModeKeys.TRAIN, estimator_lib.ModeKeys.EVAL]: - with self.assertRaisesRegexp( - ValueError, - "Features must have shape.*for feature 'exogenous'"): - model_fn( - features={ - feature_keys.TrainEvalFeatures.TIMES: [[1]], - feature_keys.TrainEvalFeatures.VALUES: [[[1., 2., 3.]]], - "exogenous": [[1], [2]] - }, - labels=None, - mode=mode) - - -class PredictFeatureCheckingTests(test.TestCase): - - def test_no_time_feature(self): - model_fn = _stub_model_fn() - with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( - feature_keys.PredictionFeatures.TIMES)): - model_fn( - features={ - feature_keys.PredictionFeatures.STATE_TUPLE: ([[[1.]]], 1.) - }, - labels=None, - mode=estimator_lib.ModeKeys.PREDICT) - - def test_no_start_state_feature(self): - model_fn = _stub_model_fn() - with self.assertRaisesRegexp(ValueError, "Expected a '{}' feature".format( - feature_keys.PredictionFeatures.STATE_TUPLE)): - model_fn( - features={feature_keys.PredictionFeatures.TIMES: [[1]]}, - labels=None, - mode=estimator_lib.ModeKeys.PREDICT) - - def test_bad_time_rank(self): - model_fn = _stub_model_fn() - with self.assertRaisesRegexp(ValueError, - "Expected shape.*for feature '{}'".format( - feature_keys.PredictionFeatures.TIMES)): - model_fn( - features={ - feature_keys.PredictionFeatures.TIMES: 1, - feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)) - }, - labels=None, - mode=estimator_lib.ModeKeys.PREDICT) - - def test_bad_exogenous_shape(self): - model_fn = _stub_model_fn() - with self.assertRaisesRegexp( - ValueError, - "Features must have shape.*for feature 'exogenous'"): - model_fn( - features={ - feature_keys.PredictionFeatures.TIMES: [[1]], - feature_keys.PredictionFeatures.STATE_TUPLE: (1, (2, 3.)), - "exogenous": 1. - }, - labels=None, - mode=estimator_lib.ModeKeys.PREDICT) - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py b/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py index 16e29f5e68..97f6d36a87 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/saved_model_utils.py @@ -23,6 +23,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.timeseries.python.timeseries import feature_keys as _feature_keys +from tensorflow.contrib.timeseries.python.timeseries import head as _head from tensorflow.contrib.timeseries.python.timeseries import input_pipeline as _input_pipeline from tensorflow.contrib.timeseries.python.timeseries import model_utils as _model_utils @@ -34,7 +35,7 @@ def _colate_features_to_feeds_and_fetches(continue_from, signature, features, """Uses a saved model signature to construct feed and fetch dictionaries.""" if _feature_keys.FilteringResults.STATE_TUPLE in continue_from: # We're continuing from an evaluation, so we need to unpack/flatten state. - state_values = _model_utils.state_to_dictionary( + state_values = _head.state_to_dictionary( continue_from[_feature_keys.FilteringResults.STATE_TUPLE]) else: state_values = continue_from diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index eb66d8e329..f3e43dd552 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1773,6 +1773,7 @@ tf_cuda_library( ) + if_mkl( [ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn//:mkl_dnn", ], ), alwayslink = 1, @@ -1933,7 +1934,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/visitable_allocator.h", "graph/gradients.h", "graph/quantize_training.h", -] +] + if_mkl(["graph/mkl_graph_util.h"]) tf_cuda_library( name = "core_cpu_impl", @@ -2034,7 +2035,10 @@ tf_cuda_library( "//third_party/eigen3", "//tensorflow/core/kernels:required", ] + if_mkl( - ["//third_party/mkl:intel_binary_blob"], + [ + "//third_party/mkl:intel_binary_blob", + "@mkl_dnn//:mkl_dnn", + ], ) + tf_additional_core_deps() + if_static([":core_cpu_impl"]), alwayslink = 1, ) @@ -2670,7 +2674,7 @@ tf_cc_test_mkl( "graph/mkl_layout_pass_test.cc", "graph/mkl_tfconversion_pass_test.cc", ], - linkstatic = tf_kernel_tests_linkstatic(), + linkstatic = 1, deps = [ ":core", ":core_cpu", @@ -2688,18 +2692,6 @@ tf_cc_test_mkl( "//tensorflow/cc:cc_ops", "//tensorflow/cc:scope", "//tensorflow/cc:sendrecv_ops", - "//tensorflow/core/kernels:mkl_aggregate_ops", - "//tensorflow/core/kernels:mkl_concat_op", - "//tensorflow/core/kernels:mkl_conv_op", - "//tensorflow/core/kernels:mkl_cwise_ops_common", - "//tensorflow/core/kernels:mkl_fused_batch_norm_op", - "//tensorflow/core/kernels:mkl_identity_op", - "//tensorflow/core/kernels:mkl_input_conversion_op", - "//tensorflow/core/kernels:mkl_lrn_op", - "//tensorflow/core/kernels:mkl_pooling_ops", - "//tensorflow/core/kernels:mkl_relu_op", - "//tensorflow/core/kernels:mkl_reshape_op", - "//tensorflow/core/kernels:mkl_tfconv_op", "//tensorflow/core/kernels:ops_util", "//third_party/eigen3", ], diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h new file mode 100644 index 0000000000..cb32d64334 --- /dev/null +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -0,0 +1,128 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ +#define TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ +#ifdef INTEL_MKL + +#include +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +// Since our ops are going to produce and also consume N addition tensors +// (Mkl) for N Tensorflow tensors, we can have following different +// orderings among these 2N tensors. +// +// E.g., for Tensorflow tensors A, B, and C, our ops will produce and +// consume A_m, B_m, and C_m additionally. +// +// INTERLEAVED: in this case 2N tensors are interleaved. So for above +// example, the ordering looks like: A, A_m, B, B_m, C, C_m. +// +// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed +// by N Mkl tensors. So for above example, the ordering looks +// like: A, B, C, A_m, B_m, C_m +// +// Following APIs map index of original Tensorflow tensors to their +// appropriate position based on selected ordering. For contiguous ordering, +// we need to know the total number of tensors (parameter total). +// +typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; +// NOTE: Currently, we use contiguous ordering. If you change this, then you +// would need to change Mkl op definitions in nn_ops.cc. +static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; + +// Get index of MetaData tensor from index 'n' of Data tensor. +inline int DataIndexToMetaDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + // For interleaved ordering, Mkl tensor follows immediately after + // Tensorflow tensor. + return n + 1; + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. + return n + total_tensors / 2; + } +} + +int inline GetTensorDataIndex(int n, int total_tensors) { + if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { + return 2 * n; // index corresponding to nth input/output tensor + } else { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + return n; + } +} + +int inline GetTensorMetaDataIndex(int n, int total_tensors) { + // Get index for TensorData first and then use mapping function + // to get TensorMetaData index from TensorData index. + int tidx = GetTensorDataIndex(n, total_tensors); + return DataIndexToMetaDataIndex(tidx, total_tensors); +} + +namespace mkl_op_registry { +static const char* kMklOpLabel = "MklOp"; +static const char* kMklOpLabelPattern = "label='MklOp'"; + +// Get the name of Mkl op from original TensorFlow op +// We prefix 'Mkl' to the original op to get Mkl op. +inline string GetMklOpName(const string& name) { + // Prefix that we add to Tensorflow op name to construct Mkl op name. + const char* const kMklOpPrefix = "_Mkl"; + return string(kMklOpPrefix) + name; +} + +// Check whether opname with type T is registered as MKL-compliant. +// +// @input: name of the op +// @input: T datatype to be used for checking op +// @return: true if opname is registered as Mkl op; false otherwise +static inline bool IsMklOp(const std::string& op_name, DataType T) { + string kernel = KernelsRegisteredForOp(op_name); + bool result = + kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); + if (result) { + VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; + } + return result; +} + +// Check whether opname with type T is registered as MKL-compliant and +// is element-wise. +// +// @input: name of the op +// @input: T datatype to be used for checking op +// @return: true if opname is registered as element-wise Mkl op; +// false otherwise +static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { + if (!IsMklOp(op_name, T)) { + return false; + } + + bool result = (0 == op_name.compare(GetMklOpName("Add")) || + 0 == op_name.compare(GetMklOpName("Sub")) || + 0 == op_name.compare(GetMklOpName("Mul")) || + 0 == op_name.compare(GetMklOpName("Maximum")) || + 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + + VLOG(1) << "mkl_op_registry::" << op_name + << " is elementwise MKL op: " << result; + return result; +} +} // namespace mkl_op_registry +} // namespace tensorflow +#endif // INTEL_MKL +#endif // TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 90377e54c7..f87a94a76a 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/tensor_format.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_layout_pass.h" -#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 6a41e3965a..a2b2f6530d 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -16,7 +16,7 @@ limitations under the License. #ifdef INTEL_MKL #include "tensorflow/core/graph/mkl_layout_pass.h" -#include "tensorflow/core/util/mkl_util.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include #include diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index 3f8b0e86d0..fe4588389e 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -33,8 +33,8 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/graph/mkl_tfconversion_pass.h" -#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc index b01818f746..bbdbe78bbd 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc @@ -16,7 +16,7 @@ limitations under the License. #ifdef INTEL_MKL #include "tensorflow/core/graph/mkl_tfconversion_pass.h" -#include "tensorflow/core/util/mkl_util.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include #include diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 36fbf6b023..bdc6faefbc 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -820,6 +820,7 @@ tf_kernel_library( hdrs = ["transpose_op.h"], deps = ARRAY_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn//:mkl_dnn", ]), ) @@ -2596,6 +2597,7 @@ tf_kernel_library( "//conditions:default": [], }) + if_mkl([ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn//:mkl_dnn", ]) + if_cuda([ "//tensorflow/core/platform/default/build_config:cublas_plugin", ]), @@ -5501,8 +5503,10 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", + ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( @@ -5516,8 +5520,10 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", + ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( @@ -5566,16 +5572,19 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", + ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( name = "mkl_fused_batch_norm_op", srcs = ["mkl_fused_batch_norm_op.cc"], - deps = NN_DEPS + [ + deps = NN_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( @@ -5589,9 +5598,10 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_concat_op", prefix = "mkl_concat_op", - deps = ARRAY_DEPS + [ + deps = ARRAY_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( @@ -5605,17 +5615,19 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_identity_op", prefix = "mkl_identity_op", - deps = ARRAY_DEPS + [ + deps = ARRAY_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( name = "mkl_lrn_op", prefix = "mkl_lrn_op", - deps = NN_DEPS + [ + deps = NN_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - ], + "@mkl_dnn//:mkl_dnn", + ]), ) tf_mkl_kernel_library( diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index 1bdfafb89b..368993c827 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -39,6 +39,48 @@ typedef Eigen::GpuDevice GPUDevice; typedef Eigen::SyclDevice SYCLDevice; #endif // TENSORFLOW_USE_SYCL +namespace { + +void GetBiasValueDims(const Tensor& value_tensor, TensorFormat data_format, + int32* batch, int32* height, int32* width, + int32* channel) { + *batch = 1; + *width = 1; + *height = 1; + *channel = 1; + if (data_format == FORMAT_NHWC) { + int32 channel_dim = value_tensor.dims() - 1; + *channel = static_cast(value_tensor.dim_size(channel_dim)); + for (int32 i = 0; i < channel_dim; i++) { + *batch *= static_cast(value_tensor.dim_size(i)); + } + } else if (data_format == FORMAT_NCHW) { + int32 channel_dim = value_tensor.dims() - 3; + int32 height_dim = value_tensor.dims() - 2; + int32 width_dim = value_tensor.dims() - 1; + *channel = static_cast(value_tensor.dim_size(channel_dim)); + *height = static_cast(value_tensor.dim_size(height_dim)); + *width = static_cast(value_tensor.dim_size(width_dim)); + for (int32 i = 0; i < channel_dim; i++) { + *batch *= static_cast(value_tensor.dim_size(i)); + } + } +} + +template +struct AccumulatorType { + typedef T type; +}; + +// float is faster on the CPU than half, and also more precise, +// so use float for the temporary accumulators. +template <> +struct AccumulatorType { + typedef float type; +}; + +} // namespace + template class BiasOp : public BinaryOp { public: @@ -50,9 +92,6 @@ class BiasOp : public BinaryOp { } else { data_format_ = FORMAT_NHWC; } - OP_REQUIRES(context, data_format_ == FORMAT_NHWC, - errors::InvalidArgument(context->device()->name() + - " BiasOp only supports NHWC.")); } void Compute(OpKernelContext* context) override { @@ -65,9 +104,21 @@ class BiasOp : public BinaryOp { OP_REQUIRES(context, TensorShapeUtils::IsVector(bias.shape()), errors::InvalidArgument("Biases must be 1D: ", bias.shape().DebugString())); - const auto last_dim = input.shape().dims() - 1; + + // Added by intel_tf to support NCHW on CPU regardless of MKL used or not. + size_t channel_dim; + if (data_format_ == FORMAT_NCHW) { + OP_REQUIRES(context, input.dims() == 4, + errors::InvalidArgument( + "NCHW format supports only 4D input tensor.")); + channel_dim = 1; + } else { + channel_dim = input.shape().dims() - 1; // End of code by intel_tf. + } + OP_REQUIRES( - context, bias.shape().dim_size(0) == input.shape().dim_size(last_dim), + context, + bias.shape().dim_size(0) == input.shape().dim_size(channel_dim), errors::InvalidArgument( "Must provide as many biases as the last dimension " "of the input tensor: ", @@ -78,6 +129,19 @@ class BiasOp : public BinaryOp { {0}, 0, input.shape(), &output)); if (input.NumElements() == 0) return; + // Added by intel_tf to support NCHW on CPU regardless of MKL used or not. + if (data_format_ == FORMAT_NCHW) { + int32 batch, height, width, channel; + GetBiasValueDims(input, data_format_, &batch, &height, &width, &channel); + Eigen::DSizes four_dims(1, channel, 1, 1); + Eigen::DSizes broad_cast_dims(batch, 1, height, width); + const Device& d = context->eigen_device(); + output->tensor().device(d) = + input.tensor() + + bias.tensor().reshape(four_dims).broadcast(broad_cast_dims); + return; + } // End of code by intel_tf. + switch (input.shape().dims()) { case 2: Compute<2>(context, input, bias, output); @@ -137,48 +201,6 @@ REGISTER_KERNEL(double); #undef REGISTER_KERNEL #endif // TENSORFLOW_USE_SYCL -namespace { - -void GetBiasValueDims(const Tensor& value_tensor, TensorFormat data_format, - int32* batch, int32* height, int32* width, - int32* channel) { - *batch = 1; - *width = 1; - *height = 1; - *channel = 1; - if (data_format == FORMAT_NHWC) { - int32 channel_dim = value_tensor.dims() - 1; - *channel = static_cast(value_tensor.dim_size(channel_dim)); - for (int32 i = 0; i < channel_dim; i++) { - *batch *= static_cast(value_tensor.dim_size(i)); - } - } else if (data_format == FORMAT_NCHW) { - int32 channel_dim = value_tensor.dims() - 3; - int32 height_dim = value_tensor.dims() - 2; - int32 width_dim = value_tensor.dims() - 1; - *channel = static_cast(value_tensor.dim_size(channel_dim)); - *height = static_cast(value_tensor.dim_size(height_dim)); - *width = static_cast(value_tensor.dim_size(width_dim)); - for (int32 i = 0; i < channel_dim; i++) { - *batch *= static_cast(value_tensor.dim_size(i)); - } - } -} - -template -struct AccumulatorType { - typedef T type; -}; - -// float is faster on the CPU than half, and also more precise, -// so use float for the temporary accumulators. -template <> -struct AccumulatorType { - typedef float type; -}; - -} // namespace - template class BiasGradOp : public OpKernel { public: @@ -190,9 +212,6 @@ class BiasGradOp : public OpKernel { } else { data_format_ = FORMAT_NHWC; } - OP_REQUIRES(context, data_format_ == FORMAT_NHWC, - errors::InvalidArgument(context->device()->name() + - " BiasGradOp only supports NHWC.")); } void Compute(OpKernelContext* context) override { @@ -222,18 +241,40 @@ class BiasGradOp : public OpKernel { // Eigen often crashes by design on empty tensors, but setZero is safe output->template flat().setZero(); } else { - Eigen::DSizes two_dims(batch * height * width, channel); + // Added by intel_tf to support NCHW on CPU regardless of MKL used or not. + if (data_format_ == FORMAT_NCHW) { + OP_REQUIRES(context, output_backprop.dims() == 4, + errors::InvalidArgument( + "NCHW format supports only 4D input/output tensor.")); + Eigen::DSizes four_dims(batch, channel, height, width); +#ifdef EIGEN_HAS_INDEX_LIST + using idx0 = Eigen::type2index<0>; + using idx2 = Eigen::type2index<2>; + using idx3 = Eigen::type2index<3>; + Eigen::IndexList reduction_axes; +#else + Eigen::array reduction_axes = {0, 2, 3}; +#endif + output->template flat().device(context->eigen_device()) = + output_backprop.flat() + .template cast::type>() + .reshape(four_dims) + .sum(reduction_axes) + .template cast(); // End of code by intel_tf. + } else { + Eigen::DSizes two_dims(batch * height * width, channel); #ifdef EIGEN_HAS_INDEX_LIST - Eigen::IndexList > reduction_axis; + Eigen::IndexList > reduction_axis; #else - Eigen::array reduction_axis = {0}; + Eigen::array reduction_axis = {0}; #endif - output->template flat().device(context->eigen_device()) = - output_backprop.flat() - .template cast::type>() - .reshape(two_dims) - .sum(reduction_axis) - .template cast(); + output->template flat().device(context->eigen_device()) = + output_backprop.flat() + .template cast::type>() + .reshape(two_dims) + .sum(reduction_axis) + .template cast(); + } } } diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc index 641077ca65..5e09963d2d 100644 --- a/tensorflow/core/kernels/conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc @@ -816,40 +816,35 @@ void LaunchConv2DBackpropFilterOp::operator()( AlgorithmConfig algorithm_config; if (cudnn_use_autotune && !AutoTuneConvBwdFilter::GetInstance()->Find( conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator( - ConvolveBackwardFilterScratchSize, ctx); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardFilterWithAlgorithm( - input_desc, input_ptr, output_desc, out_backprop_ptr, - conv_desc, filter_desc, &filter_backprop_ptr, - &scratch_allocator, AlgorithmConfig(profile_algorithm), - &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, + ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardFilterWithAlgorithm( + input_desc, input_ptr, output_desc, out_backprop_ptr, + conv_desc, filter_desc, &filter_backprop_ptr, + &scratch_allocator, AlgorithmConfig(profile_algorithm), + &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 0732bf4046..0b2d01afa9 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -870,39 +870,34 @@ void LaunchConv2DBackpropInputOp::operator()( AlgorithmConfig algorithm_config; if (cudnn_use_autotune && !AutoTuneConvBwdData::GetInstance()->Find( conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, - ctx); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardDataWithAlgorithm( - filter_desc, filter_ptr, output_desc, out_backprop_ptr, - conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, - AlgorithmConfig(profile_algorithm), &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, + ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardDataWithAlgorithm( + filter_desc, filter_ptr, output_desc, out_backprop_ptr, + conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, + AlgorithmConfig(profile_algorithm), &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 8ad56053a8..21f5cb1716 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -654,40 +654,34 @@ class Conv3DBackpropInputOp : public OpKernel { AlgorithmConfig algorithm_config; if (cudnn_use_autotune_ && !AutoTuneConv3dBwdData::GetInstance()->Find( conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardDataAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator( - ConvolveBackwardDataScratchSize, context); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardDataWithAlgorithm( - filter_desc, filter_ptr, output_desc, out_backprop_ptr, - conv_desc, input_desc, &in_backprop_ptr, - &scratch_allocator, AlgorithmConfig(profile_algorithm), - &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, + context); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardDataWithAlgorithm( + filter_desc, filter_ptr, output_desc, out_backprop_ptr, + conv_desc, input_desc, &in_backprop_ptr, &scratch_allocator, + AlgorithmConfig(profile_algorithm), &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } @@ -1026,40 +1020,35 @@ class Conv3DBackpropFilterOp : public OpKernel { AlgorithmConfig algorithm_config; if (cudnn_use_autotune_ && !AutoTuneConv3dBwdFilter::GetInstance()->Find( conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveBackwardFilterAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator( - ConvolveBackwardFilterScratchSize, context); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveBackwardFilterWithAlgorithm( - input_desc, input_ptr, output_desc, out_backprop_ptr, - conv_desc, filter_desc, &filter_backprop_ptr, - &scratch_allocator, AlgorithmConfig(profile_algorithm), - &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator( + ConvolveBackwardFilterScratchSize, context); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveBackwardFilterWithAlgorithm( + input_desc, input_ptr, output_desc, out_backprop_ptr, + conv_desc, filter_desc, &filter_backprop_ptr, + &scratch_allocator, AlgorithmConfig(profile_algorithm), + &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index dc03eeb658..bb67113fb0 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -662,38 +662,33 @@ void LaunchConv2DOp::operator()( AlgorithmConfig algorithm_config; if (cudnn_use_autotune && !AutoTuneConv::GetInstance()->Find(conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveWithAlgorithm( - input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, - output_desc, &output_ptr, &scratch_allocator, - AlgorithmConfig(profile_algorithm), &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveWithAlgorithm( + input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, + output_desc, &output_ptr, &scratch_allocator, + AlgorithmConfig(profile_algorithm), &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc index 72758f707a..8a89d564de 100644 --- a/tensorflow/core/kernels/conv_ops_3d.cc +++ b/tensorflow/core/kernels/conv_ops_3d.cc @@ -390,38 +390,33 @@ struct LaunchConvOp { if (cudnn_use_autotune && !AutoTuneConv3d::GetInstance()->Find( conv_parameters, &algorithm_config)) { - std::vector algorithms; + std::vector algorithms; CHECK(stream->parent()->GetConvolveAlgorithms( conv_parameters.ShouldIncludeWinogradNonfusedAlgo(), &algorithms)); ProfileResult best_result; ProfileResult best_result_no_scratch; - // TODO(benbarsdell): Ideally this should not attempt using tensor op math - // if it's not enabled. - for (bool use_tensor_ops : {false, true}) { - for (auto algo_index : algorithms) { - AlgorithmDesc profile_algorithm(algo_index, use_tensor_ops); - // TODO(zhengxq): profile each algorithm multiple times to better - // accuracy. - CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); - ProfileResult profile_result; - bool cudnn_launch_status = - stream - ->ThenConvolveWithAlgorithm( - input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, - output_desc, &output_ptr, &scratch_allocator, - AlgorithmConfig(profile_algorithm), &profile_result) - .ok(); - if (cudnn_launch_status) { - if (profile_result.is_valid()) { - if (profile_result.elapsed_time_in_ms() < - best_result.elapsed_time_in_ms()) { - best_result = profile_result; - } - if (scratch_allocator.TotalByteSize() == 0 && - profile_result.elapsed_time_in_ms() < - best_result_no_scratch.elapsed_time_in_ms()) { - best_result_no_scratch = profile_result; - } + for (auto profile_algorithm : algorithms) { + // TODO(zhengxq): profile each algorithm multiple times to better + // accuracy. + CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx); + ProfileResult profile_result; + bool cudnn_launch_status = + stream + ->ThenConvolveWithAlgorithm( + input_desc, input_ptr, filter_desc, filter_ptr, conv_desc, + output_desc, &output_ptr, &scratch_allocator, + AlgorithmConfig(profile_algorithm), &profile_result) + .ok(); + if (cudnn_launch_status) { + if (profile_result.is_valid()) { + if (profile_result.elapsed_time_in_ms() < + best_result.elapsed_time_in_ms()) { + best_result = profile_result; + } + if (scratch_allocator.TotalByteSize() == 0 && + profile_result.elapsed_time_in_ms() < + best_result_no_scratch.elapsed_time_in_ms()) { + best_result_no_scratch = profile_result; } } } diff --git a/tensorflow/core/kernels/decode_csv_op.cc b/tensorflow/core/kernels/decode_csv_op.cc index 42ea23553b..5e48ae9766 100644 --- a/tensorflow/core/kernels/decode_csv_op.cc +++ b/tensorflow/core/kernels/decode_csv_op.cc @@ -36,8 +36,8 @@ class DecodeCSVOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_quote_delim", &use_quote_delim_)); OP_REQUIRES(ctx, delim.size() == 1, errors::InvalidArgument("field_delim should be only 1 char")); - delim_ = delim[0]; + OP_REQUIRES_OK(ctx, ctx->GetAttr("na_value", &na_value_)); } void Compute(OpKernelContext* ctx) override { @@ -79,9 +79,9 @@ class DecodeCSVOp : public OpKernel { const DataType& dtype = out_type_[f]; switch (dtype) { case DT_INT32: { - // If this field is empty, check if default is given: + // If this field is empty or NA value, check if default is given: // If yes, use default value; Otherwise report error. - if (fields[f].empty()) { + if (fields[f].empty() || fields[f] == na_value_) { OP_REQUIRES(ctx, record_defaults[f].NumElements() == 1, errors::InvalidArgument( "Field ", f, @@ -99,9 +99,9 @@ class DecodeCSVOp : public OpKernel { break; } case DT_INT64: { - // If this field is empty, check if default is given: + // If this field is empty or NA value, check if default is given: // If yes, use default value; Otherwise report error. - if (fields[f].empty()) { + if (fields[f].empty() || fields[f] == na_value_) { OP_REQUIRES(ctx, record_defaults[f].NumElements() == 1, errors::InvalidArgument( "Field ", f, @@ -119,9 +119,9 @@ class DecodeCSVOp : public OpKernel { break; } case DT_FLOAT: { - // If this field is empty, check if default is given: + // If this field is empty or NA value, check if default is given: // If yes, use default value; Otherwise report error. - if (fields[f].empty()) { + if (fields[f].empty() || fields[f] == na_value_) { OP_REQUIRES(ctx, record_defaults[f].NumElements() == 1, errors::InvalidArgument( "Field ", f, @@ -138,9 +138,9 @@ class DecodeCSVOp : public OpKernel { break; } case DT_STRING: { - // If this field is empty, check if default is given: + // If this field is empty or NA value, check if default is given: // If yes, use default value; Otherwise report error. - if (fields[f].empty()) { + if (fields[f].empty() || fields[f] == na_value_) { OP_REQUIRES(ctx, record_defaults[f].NumElements() == 1, errors::InvalidArgument( "Field ", f, @@ -165,6 +165,7 @@ class DecodeCSVOp : public OpKernel { std::vector out_type_; char delim_; bool use_quote_delim_; + string na_value_; void ExtractFields(OpKernelContext* ctx, StringPiece input, std::vector* result) { diff --git a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc b/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc index 25a6813d59..0174c8dfc8 100644 --- a/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc +++ b/tensorflow/core/kernels/dense_to_sparse_batch_dataset_op.cc @@ -49,10 +49,10 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { OP_REQUIRES_OK(ctx, ctx->input("row_shape", &row_shape_t)); OP_REQUIRES(ctx, TensorShapeUtils::IsVector(row_shape_t->shape()), errors::InvalidArgument("row_shape must be a vector")); - TensorShape row_shape; - for (size_t i = 0; i < row_shape_t->dim_size(0); ++i) { - row_shape.AddDim(row_shape_t->vec()(i)); - } + PartialTensorShape row_shape; + OP_REQUIRES_OK(ctx, PartialTensorShape::MakePartialShape( + row_shape_t->vec().data(), + row_shape_t->NumElements(), &row_shape)); *output = nullptr; @@ -78,7 +78,7 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { template class Dataset : public DatasetBase { public: - Dataset(int64 batch_size, const TensorShape& row_shape, + Dataset(int64 batch_size, const PartialTensorShape& row_shape, const DatasetBase* input) : batch_size_(batch_size), row_shape_(row_shape), input_(input) { input_->Ref(); @@ -129,9 +129,22 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { int64 total_elements = 0; batch_elements.reserve( DatasetIterator>::dataset()->batch_size_); - const TensorShape& row_shape = + const PartialTensorShape& row_shape = DatasetIterator>::dataset()->row_shape_; const int row_ndims = row_shape.dims(); + + // Determine the size of the output tensors: + // * dense_shape will be [`row_shape + 1`]. + Tensor dense_shape(cpu_allocator(), DT_INT64, {row_ndims + 1}); + auto dense_shape_vec = dense_shape.vec(); + for (size_t i = 0; i < row_ndims; ++i) { + if (row_shape.dim_size(i) == -1) { + dense_shape_vec(i + 1) = 0; + } else { + dense_shape_vec(i + 1) = row_shape.dim_size(i); + } + } + { mutex_lock l(mu_); *end_of_sequence = false; @@ -156,9 +169,14 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { ") that is incompatible with the row shape (", row_shape.DebugString(), ")."); } - for (int i = 0; i < row_ndims; ++i) { - if (batch_element_tuple[0].shape().dim_size(i) > - row_shape.dim_size(i)) { + for (int j = 0; j < row_ndims; ++j) { + // Take the maximum in the dimension if -1 is given. + if (row_shape.dim_size(j) == -1) { + dense_shape_vec(j + 1) = + std::max(batch_element_tuple[0].dim_size(j), + dense_shape_vec(j + 1)); + } else if (batch_element_tuple[0].dim_size(j) > + row_shape.dim_size(j)) { return errors::DataLoss( "Input element had shape (", batch_element_tuple[0].shape().DebugString(), @@ -175,20 +193,16 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - // Determine the size of the output tensors: // * indices will be [`total_elements`, `row_shape + 1`]. // * values will be [`total_elements`]. - // * dense_shape will be [`row_shape + 1`]. Tensor indices(cpu_allocator(), DT_INT64, {total_elements, row_ndims + 1}); Tensor values( cpu_allocator(), DatasetIterator>::dataset()->output_dtypes()[1], {total_elements}); - Tensor dense_shape(cpu_allocator(), DT_INT64, {row_ndims + 1}); auto indices_matrix = indices.matrix(); auto values_flat = values.flat(); - auto dense_shape_vec = dense_shape.vec(); int64 current_position_in_values = 0; for (int64 i = 0; i < batch_elements.size(); ++i) { @@ -220,9 +234,6 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { } dense_shape_vec(0) = batch_elements.size(); - for (size_t i = 0; i < row_ndims; ++i) { - dense_shape_vec(i + 1) = row_shape.dim_size(i); - } out_tensors->push_back(std::move(indices)); out_tensors->push_back(std::move(values)); @@ -239,7 +250,7 @@ class DenseToSparseBatchDatasetOp : public UnaryDatasetOpKernel { }; const int64 batch_size_; - const TensorShape row_shape_; + const PartialTensorShape row_shape_; const DatasetBase* const input_; std::vector output_shapes_; }; diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index f81a448e51..9080bf7be8 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/conv_grad_ops.h" +#include "tensorflow/core/kernels/mkl_conv_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -41,10 +42,24 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#ifdef INTEL_MKL_DNN +#include "mkldnn.hpp" + +using mkldnn::prop_kind; +using mkldnn::stream; + +using mkldnn::convolution_backward_weights; +using mkldnn::convolution_direct; +using mkldnn::convolution_forward; + +#endif + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; +#ifndef INTEL_MKL_DNN + template class MklConv2DCustomBackpropFilterOp : public OpKernel { public: @@ -411,6 +426,172 @@ class MklConv2DCustomBackpropFilterOp : public OpKernel { TensorFormat data_format_; }; +#else + +template +class MklConv2DCustomBackpropFilterOp : public OpKernel { + public: + explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context) + : OpKernel(context) { + string data_format; + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + + OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); + int stride_n = GetTensorDim(strides_, data_format_, 'N'); + int stride_c = GetTensorDim(strides_, data_format_, 'C'); + OP_REQUIRES( + context, (stride_n == 1 && stride_c == 1), + errors::InvalidArgument("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + try { + auto cpu_engine = engine(engine::cpu, 0); + + MklDnnData input(&cpu_engine); + MklDnnData outbackprop(&cpu_engine); + MklDnnData output(&cpu_engine); + + // Input tensors + const Tensor& input_tensor = MklGetInput(context, 0); + const Tensor& filter_tensor = MklGetInput(context, 1); + const Tensor& obp_tensor = MklGetInput(context, 2); // Outbackprop + + // Generate input shapes. + TensorShape filter_shape; + OP_REQUIRES( + context, TensorShapeUtils::IsVector(filter_tensor.shape()), + errors::InvalidArgument( + "Conv2DBackpropFilter: filter_sizes input must be 1-dim, not ", + filter_tensor.dims())); + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( + filter_tensor.vec(), &filter_shape)); + TensorShape input_shape = input_tensor.shape(); + TensorShape obp_shape = obp_tensor.shape(); + + // By default, all dims are in MKL order. Only dims in TF order + // are those with prefix tf_order. + memory::dims obp_dims, fwd_input_dims, fwd_filter_dims; + memory::dims padding_l, padding_r, strides, fwd_output_dims; + memory::dims fwd_output_dims_tf_order; + + // Get forward convolution parameters. + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + conv_utl.GetConvFwdSizesInMklOrder( + input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); + if (!context->status().ok()) return; + + // Create Convolution forward descriptor since Convolution backward + // API needs it. For that, we first need to create input, filter + // and output memory descriptors. + auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); + auto fwd_src_md = + memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); + auto fwd_filter_md = + memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); + auto fwd_out_md = + memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); + + // Allocate output tensor and shape + // TODO(nhasabni): Update this when support for MKL layout is added. + // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D. + TensorShape tf_output_shape(filter_shape); + MklShape mkl_output_mkl_shape; + mkl_output_mkl_shape.SetMklTensor(false); + Tensor* output_tensor = nullptr; + AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape, + mkl_output_mkl_shape); + + // Create memory for user data. + // Describe how the inputs and outputs of Convolution look like. Also + // specify buffers containing actual input and output data. + // Although input shape required is in MKL-DNN order, the layout is + // Tensorflow's layout (NHWC or NCHW depending on data format). + input.SetUsrMem(fwd_input_dims, mkl_data_format, &input_tensor); + // Outbackprop shape is NHWC or NCHW depending on data format. Since + // GetInputSizeInMklOrder function returns size in that order we just use + // use that function directly. + conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims); + if (!context->status().ok()) return; + outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor); + // Although output shape required is in MKL-DNN order, + // layout is Tensorflow's filter layout (HWIO) + // Shape of output of Conv2DBackpropInput is same as shape of filter. + memory::dims bwd_output_dims = fwd_filter_dims; + output.SetUsrMem(bwd_output_dims, memory::format::hwio, output_tensor); + + // Create memory descriptors for convolution data w/ no specified format. + input.SetOpMemDesc(fwd_input_dims, memory::format::any); + outbackprop.SetOpMemDesc(obp_dims, memory::format::any); + output.SetOpMemDesc(bwd_output_dims, memory::format::any); + + // Create convolution backward weights primitive. + auto bwd_desc = convolution_backward_weights::desc( + convolution_direct, input.GetOpMemDesc(), output.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto bwd_pd = convolution_backward_weights::primitive_desc( + bwd_desc, cpu_engine, fwd_pd); + + PrepareAndExecutePrimitive(bwd_pd, &input, &outbackprop, &output); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + std::vector strides_; + Padding padding_; + TensorFormat data_format_; + + // Prepare and execute net - checks for input and output reorders. + void PrepareAndExecutePrimitive( + const convolution_backward_weights::primitive_desc& conv_pd, + MklDnnData* input, MklDnnData* obp, MklDnnData* output) { + // Create reorders between user layout and MKL layout if it is needed and + // add it to the net before convolution. + std::vector net; + input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net); + obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net); + + // Memory for output of convolution. Since we may need reorder on the + // output side, we will prepare reorder primitive in case output + // reorder to user memory is required. + bool output_reorder_required = output->PrepareReorderToUserMemIfReq( + conv_pd.diff_weights_primitive_desc()); + + net.push_back(convolution_backward_weights( + conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem())); + + // Insert reorder primitive in the net for output reorder if reorder is + // required. + if (output_reorder_required) { + output->InsertReorderToUserMem(&net); + } + + // Handle output reorder + stream(stream::kind::eager).submit(net).wait(); + } +}; +#endif + #define REGISTER_MKL_FILTER_KERNELS(T) \ REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropFilter") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index 00884d0981..4b6bf92e42 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -23,6 +23,8 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include +#include "mkl_dnn.h" +#include "mkl_dnn_types.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -30,6 +32,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/conv_grad_ops.h" +#include "tensorflow/core/kernels/mkl_conv_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -40,13 +43,24 @@ limitations under the License. #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" #include "tensorflow/core/util/work_sharder.h" -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" + +#ifdef INTEL_MKL_DNN +#include "mkldnn.hpp" + +using mkldnn::prop_kind; +using mkldnn::stream; + +using mkldnn::convolution_backward_data; +using mkldnn::convolution_direct; +using mkldnn::convolution_forward; +#endif namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; +#ifndef INTEL_MKL_DNN + template class MklConv2DCustomBackpropInputOp : public OpKernel { public: @@ -345,6 +359,178 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { TensorFormat data_format; }; +#else + +template +class MklConv2DCustomBackpropInputOp : public OpKernel { + public: + ~MklConv2DCustomBackpropInputOp() {} + explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context) + : OpKernel(context) { + string data_format_str; + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format_str)); + OP_REQUIRES(context, FormatFromString(data_format_str, &data_format_), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); + int stride_n = GetTensorDim(strides_, data_format_, 'N'); + int stride_c = GetTensorDim(strides_, data_format_, 'C'); + OP_REQUIRES( + context, (stride_n == 1 && stride_c == 1), + errors::InvalidArgument("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); + + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + try { + auto cpu_engine = engine(engine::cpu, 0); + + MklDnnData filter(&cpu_engine); + MklDnnData outbackprop(&cpu_engine); + MklDnnData output(&cpu_engine); + + // Input tensors + const Tensor& input_tensor = MklGetInput(context, 0); + const Tensor& filter_tensor = MklGetInput(context, 1); + const Tensor& obp_tensor = MklGetInput(context, 2); // Outbackprop + + // Generate input shape. + TensorShape input_shape; + OP_REQUIRES( + context, TensorShapeUtils::IsVector(input_tensor.shape()), + errors::InvalidArgument( + "Conv2DBackpropInput: input_sizes input must be 1-dim, not ", + input_tensor.dims())); + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( + input_tensor.vec(), &input_shape)); + TensorShape filter_shape = filter_tensor.shape(); + TensorShape obp_shape = obp_tensor.shape(); + + // By default, all dims are in MKL order. Only dims in TF order + // are those with prefix tf_order. + memory::dims obp_dims, fwd_input_dims, fwd_filter_dims; + memory::dims padding_l, padding_r, strides, fwd_output_dims; + memory::dims fwd_output_dims_tf_order; + + // Get forward convolution parameters. + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + conv_utl.GetConvFwdSizesInMklOrder( + input_shape, filter_shape, &fwd_input_dims, &fwd_filter_dims, + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); + if (!context->status().ok()) return; + + // Create Convolution forward descriptor since Convolution backward + // API needs it. For that, we first need to create input, filter + // and output memory descriptors. + auto mkl_data_format = TFDataFormatToMklDnnDataFormat(data_format_); + auto fwd_src_md = + memory::desc(fwd_input_dims, MklDnnType(), mkl_data_format); + auto fwd_filter_md = + memory::desc(fwd_filter_dims, MklDnnType(), memory::format::hwio); + auto fwd_out_md = + memory::desc(fwd_output_dims, MklDnnType(), mkl_data_format); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_src_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); + + // Allocate output tensor and shape + // TODO(nhasabni): Update this when support for MKL layout is added. + // Shape of output of Conv2DBackpropInput is same as 'input' of Conv2D. + TensorShape tf_output_shape(input_shape); + MklShape mkl_output_mkl_shape; + mkl_output_mkl_shape.SetMklTensor(false); + Tensor* output_tensor = nullptr; + AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape, + mkl_output_mkl_shape); + + // Create memory for user data. + // Describe how the inputs and outputs of Convolution look like. Also + // specify buffers containing actual input and output data. + // Although input shape required is in MKL-DNN order, the layout is + // Tensorflow's layout (NHWC or NCHW depending on data format). + // Although filter shape (filter_dims) required is in MKL-DNN order, + // the layout is Tensorflow's layout (HWIO). + // Shape of Conv2DBackpropInput's filter is same as that of Conv2D filter. + filter.SetUsrMem(fwd_filter_dims, memory::format::hwio, &filter_tensor); + // Outbackprop shape is NHWC or NCHW depending on data format. Since + // GetInputSizeInMklOrder function returns size in that order we just use + // use that function directly. + conv_utl.GetInputSizeInMklOrder(obp_shape, &obp_dims); + if (!context->status().ok()) return; + outbackprop.SetUsrMem(obp_dims, mkl_data_format, &obp_tensor); + // Although output shape required is in MKL-DNN order, + // layout is Tensorflow's layout (NHWC or NCHW depending on data format). + // Shape of output of Conv2DBackpropInput is same as shape of 'input' + // of Conv2D. + memory::dims bwd_output_dims = fwd_input_dims; + output.SetUsrMem(bwd_output_dims, mkl_data_format, output_tensor); + + // Create memory descriptors for convolution data w/ no specified format. + filter.SetOpMemDesc(fwd_filter_dims, memory::format::any); + outbackprop.SetOpMemDesc(obp_dims, memory::format::any); + output.SetOpMemDesc(bwd_output_dims, memory::format::any); + + // Create convolution backward data primitive. + auto bwd_desc = convolution_backward_data::desc( + convolution_direct, output.GetOpMemDesc(), filter.GetOpMemDesc(), + outbackprop.GetOpMemDesc(), strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto bwd_pd = convolution_backward_data::primitive_desc( + bwd_desc, cpu_engine, fwd_pd); + + PrepareAndExecutePrimitive(bwd_pd, &filter, &outbackprop, &output); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + std::vector strides_; + Padding padding_; + TensorFormat data_format_; + + // Prepare and execute net - checks for input and output reorders. + void PrepareAndExecutePrimitive( + const convolution_backward_data::primitive_desc& conv_pd, + MklDnnData* filter, MklDnnData* obp, MklDnnData* output) { + // Create reorders between user layout and MKL layout if it is needed and + // add it to the net before convolution. + std::vector net; + filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net); + obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net); + + // Memory for output of convolution. Since we may need reorder on the + // output side, we will prepare reorder primitive in case output + // reorder to user memory is required. + bool output_reorder_required = + output->PrepareReorderToUserMemIfReq(conv_pd.diff_src_primitive_desc()); + + net.push_back(convolution_backward_data( + conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem())); + + // Insert reorder primitive in the net for output reorder if reorder is + // required. + if (output_reorder_required) { + output->InsertReorderToUserMem(&net); + } + + // Handle output reorder + stream(stream::kind::eager).submit(net).wait(); + } +}; + +#endif // INTEL_MKL_DNN + #define REGISTER_MKL_CPU_KERNELS(T) \ REGISTER_KERNEL_BUILDER(Name("_MklConv2DBackpropInput") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 7f1555d325..57661e8b10 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -18,7 +18,9 @@ limitations under the License. #include #include +#include #include + #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -26,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/mkl_conv_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -40,10 +43,23 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#ifdef INTEL_MKL_DNN +#include "mkldnn.hpp" + +using mkldnn::prop_kind; +using mkldnn::stream; + +using mkldnn::convolution_direct; +using mkldnn::convolution_forward; +#endif + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; +// For now, MKL-ML is default. So making MKL-DNN not a default choice. +#ifndef INTEL_MKL_DNN + template class MklConv2DOp : public OpKernel { public: @@ -461,6 +477,203 @@ class MklConv2DOp : public OpKernel { TensorFormat data_format_; }; +#else + +template +class MklConv2DOp : public OpKernel { + public: + ~MklConv2DOp() {} + + explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); + string data_format; + OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); + OP_REQUIRES(context, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + OP_REQUIRES(context, strides_.size() == 4, + errors::InvalidArgument("Sliding window strides field must " + "specify 4 dimensions")); + + const int64 stride_n = GetTensorDim(strides_, data_format_, 'N'); + const int64 stride_c = GetTensorDim(strides_, data_format_, 'C'); + OP_REQUIRES( + context, stride_n == 1 && stride_c == 1, + errors::InvalidArgument("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + try { + auto cpu_engine = engine(engine::cpu, 0); + + // Input tensors + size_t src_idx = 0, filter_idx = 1; + const Tensor& src_tensor = MklGetInput(context, src_idx); + const Tensor& filter_tensor = MklGetInput(context, filter_idx); + + MklDnnData src(&cpu_engine); + MklDnnData filter(&cpu_engine); + MklDnnData output(&cpu_engine); + + memory::dims src_dims, filter_dims, padding_l, padding_r, strides; + memory::dims output_dims_tf_order, output_dims_mkl_order; + + // Get shapes of input tensors in MKL-DNN order + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + conv_utl.GetConvFwdSizesInMklOrder( + src_tensor.shape(), filter_tensor.shape(), &src_dims, &filter_dims, + &strides, &output_dims_tf_order, &output_dims_mkl_order, &padding_l, + &padding_r); + if (!context->status().ok()) return; + + // Check for corner case - if there is nothing to compute, return. + TensorShape tf_output_shape( + {output_dims_tf_order[0], output_dims_tf_order[1], + output_dims_tf_order[2], output_dims_tf_order[3]}); + Tensor* output_tensor = nullptr; + MklShape mkl_output_mkl_shape; + mkl_output_mkl_shape.SetMklTensor(false); + AllocateOutputSetMklShape(context, 0, &output_tensor, tf_output_shape, + mkl_output_mkl_shape); + + // Forward filter in TF format from input at index 1 to output at index 1. + ForwardTfTensorInToOut(context, 1, 1); + + if (tf_output_shape.num_elements() == 0) { + // TODO(jbobba): Verify correctness here + // Need semantics for Null MKL tensor + return; + } + + // Corner case to handle 0 batch size. + if (output_dims_tf_order[0] == 0) { + // Nothing to do, allocate output tensor and return + // TODO(nhasabni): remove this code later once serialization + // in MKL-DNN is supported. + AllocateOutputSetMklShape(context, 0, &output_tensor, + src_tensor.shape(), mkl_output_mkl_shape); + return; + } else { + // Otherwise regular output tensor allocation + // Allocate output tensor. + } + CHECK_NOTNULL(output_tensor); + + // Create memory for user data. + // Describe how the inputs and outputs of Convolution look like. Also + // specify buffers containing actual input and output data. + // Although input shape (src_dims) required is in MKL-DNN order, + // the layout is Tensorflow's layout (NHWC or NCHW depending on data + // format). + src.SetUsrMem(src_dims, TFDataFormatToMklDnnDataFormat(data_format_), + const_cast( + static_cast(src_tensor.flat().data()))); + // Although filter shape (filter_dims) required is in MKL-DNN order, + // the layout is Tensorflow's layout (HWIO). + filter.SetUsrMem(filter_dims, memory::format::hwio, + const_cast(static_cast( + filter_tensor.flat().data()))); + // Although output shape (output_dims) required is in MKL-DNN order, + // layout is Tensorflow's layout (NHWC or NCHW depending on data format). + output.SetUsrMem(output_dims_mkl_order, + TFDataFormatToMklDnnDataFormat(data_format_), + output_tensor->flat().data()); + + // Create memory descriptors for convolution data w/ no specified format. + src.SetOpMemDesc(src_dims, memory::format::any); + filter.SetOpMemDesc(filter_dims, memory::format::any); + output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); + + // If bias is enabled, then do the same steps as above for bias. + if (biasEnabled) { + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, 2); + bias.SetUsrMem(bias_size, memory::format::x, + const_cast(static_cast( + bias_tensor.flat().data()))); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output); + } else { + // Create convolution primitive without Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + std::vector strides_; + Padding padding_; + TensorFormat data_format_; + + // Prepare and execute net - checks for input and output reorders. + void PrepareAndExecuteNet( + const convolution_forward::primitive_desc& conv_prim_desc, + MklDnnData* src, MklDnnData* filter, MklDnnData* bias, + MklDnnData* output) { + // Create reorders between user layout and MKL layout if it is needed and + // add it to the net before convolution. + std::vector net; + src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net); + filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net); + + // Memory for output of convolution. Since we may need reorder on the + // output side, we will prepare reorder primitive in case output + // reorder to user memory is required. + bool output_reorder_required = output->PrepareReorderToUserMemIfReq( + conv_prim_desc.dst_primitive_desc()); + + // Create convolution primitive and add it to net. + if (bias) { + CHECK_EQ(biasEnabled, true); + net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), + filter->GetOpMem(), bias->GetOpMem(), + output->GetOpMem())); + } else { + CHECK_EQ(biasEnabled, false); + net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(), + filter->GetOpMem(), + output->GetOpMem())); + } + + // Insert reorder primitive in the net for output reorder if reorder is + // required. + if (output_reorder_required) { + output->InsertReorderToUserMem(&net); + } + + // Handle output reorder + stream(stream::kind::eager).submit(net).wait(); + } +}; + +#endif + #define REGISTER_MKL_CPU(T) \ REGISTER_KERNEL_BUILDER(Name("_MklConv2D") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h new file mode 100644 index 0000000000..e29af19ca9 --- /dev/null +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -0,0 +1,308 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ + +#include +#include + +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_slice.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/conv_grad_ops.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/tensor_format.h" + +#include "tensorflow/core/util/mkl_util.h" + +#ifdef INTEL_MKL_DNN +#include "mkldnn.hpp" +#endif + +namespace tensorflow { + +#ifdef INTEL_MKL_DNN + +class MklDnnConvUtil { + protected: + OpKernelContext *context_; // We don't own this. + std::vector strides_; + Padding padding_; + TensorFormat data_format_; + + public: + MklDnnConvUtil(OpKernelContext *context, const std::vector &strides, + Padding pad, TensorFormat fm) + : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + + virtual ~MklDnnConvUtil() { context_ = nullptr; } + + // Calculate Convolution strides + virtual inline void GetStridesInMklOrder(memory::dims *strides) { + // For now we take the stride from the second and third dimensions only + // (we do not support striding on the batch or depth dimension). + CHECK_NOTNULL(strides); + int stride_rows = GetTensorDim(strides_, data_format_, 'H'); + int stride_cols = GetTensorDim(strides_, data_format_, 'W'); + *strides = {stride_rows, stride_cols}; + } + + // Calculate Convolution input size in MKL-DNN order. MKL-DNN + // requires input in NCHW format. Function does not return anything. + // But errors arising from sanity checks are returned in context's + // status. + virtual inline void GetInputSizeInMklOrder(const TensorShape &input_shape, + memory::dims *input_dims) { +#define CHECK_BOUNDS(val, err_msg) \ + do { \ + OP_REQUIRES(context_, \ + FastBoundsCheck(val, std::numeric_limits::max()), \ + errors::InvalidArgument(err_msg)); \ + } while (0) + + CHECK_NOTNULL(input_dims); + + // Input channel + int64 input_depth_raw = GetTensorDim(input_shape, data_format_, 'C'); + int input_depth = static_cast(input_depth_raw); + + // Input rows/height + int64 input_rows_raw = GetTensorDim(input_shape, data_format_, 'H'); + CHECK_BOUNDS(input_rows_raw, "Input rows too large"); + int input_rows = static_cast(input_rows_raw); + + // Input columns/width + int64 input_cols_raw = GetTensorDim(input_shape, data_format_, 'W'); + CHECK_BOUNDS(input_cols_raw, "Input cols too large"); + int input_cols = static_cast(input_cols_raw); + + // Input batch + int64 input_batch_raw = GetTensorDim(input_shape, data_format_, 'N'); + CHECK_BOUNDS(input_batch_raw, "Input batch too large"); + int input_batch = static_cast(input_batch_raw); + +#undef CHECK_BOUNDS + + // MKL-DNN always requires input in NCHW format. + *input_dims = {input_batch, input_depth, input_rows, input_cols}; + } + + // Calculate Convolution filter size in MKL-DNN order. MKL-DNN + // requires filter in OIHW format. Function does not return anything. + // But errors arising from sanity checks are returned in context's + // status. + // + // Calculate Convolution filter size in MKL-DNN order. MKL-DNN + // requires filter in OIHW format. Function does not return anything. + // But errors arising from sanity checks are returned in context's + // status. This function differs from GetConvFilterSizeInMklOrder in + // parameter for input - it accepts src_shape since Convolution Backward + // Input gets shape of input tensor rather than actual tensor (Convolution + // forward gets actual tensor as input). + // + // TODO(nhasabni): Add similar function for input and filter in MklShape. + virtual inline void GetFilterSizeInMklOrder(const TensorShape &input_shape, + const TensorShape &filter_shape, + memory::dims *filter_dims) { + CHECK_NOTNULL(filter_dims); + + OP_REQUIRES(context_, filter_shape.dims() == 4, + errors::InvalidArgument("filter must be 4-dimensional: ", + filter_shape.DebugString())); + + for (int i = 0; i < 3; i++) { + OP_REQUIRES(context_, + FastBoundsCheck(filter_shape.dim_size(i), + std::numeric_limits::max()), + errors::InvalidArgument("filter too large")); + } + + int input_depth = GetTensorDim(input_shape, data_format_, 'C'); + + OP_REQUIRES(context_, input_depth == filter_shape.dim_size(2), + errors::InvalidArgument( + "input and filter must have the same depth: ", input_depth, + " vs ", filter_shape.dim_size(2))); + + // TF filter is always in (rows, cols, in_depth, out_depth) order. + int filter_rows = static_cast(filter_shape.dim_size(0)); + int filter_cols = static_cast(filter_shape.dim_size(1)); + int in_depth = static_cast(filter_shape.dim_size(2)); + int out_depth = static_cast(filter_shape.dim_size(3)); + + // MKL-DNN always needs filter in OIHW format. + // OIHW = (out_depth, in_depth, rows, cols) + *filter_dims = {out_depth, in_depth, filter_rows, filter_cols}; + } + + // Calculate Convolution filter size in MKL-DNN order. MKL-DNN + // requires filter in OIHW format. Function does not return anything. + // But errors arising from sanity checks are returned in context's + // status. + virtual inline void GetFilterSizeInMklOrder(size_t src_index, + size_t filter_index, + memory::dims *filter_dims) { + CHECK_NOTNULL(filter_dims); + const Tensor &input = MklGetInput(context_, src_index); + const Tensor &filter = MklGetInput(context_, filter_index); + GetFilterSizeInMklOrder(input.shape(), filter.shape(), filter_dims); + } + + // Calculate Bias size for 2D Convolution. Function does not return + // anything, but sets error in context status. + virtual inline void GetBiasSizeInMklOrder(size_t bias_index, + memory::dims *bias_dims) { + const Tensor &bias = MklGetInput(context_, bias_index); + OP_REQUIRES(context_, bias.dims() == 1, + errors::InvalidArgument("bias must be 1-dimensional: ", + bias.shape().DebugString())); + + *bias_dims = {static_cast(bias.dim_size(0))}; + } + + // Function to calculate output and padding size for 2D convolution. + // + // Calculate output shape of Convolution in MKL-DNN and TensorFlow order. + // MKL-DNN uses NCHW for output order. But TensorFlow output will be in + // NHWC or NCHW format depending on data format. Function also calculates + // left, right, top and bottom pads. Function does not return any status - + // status is returned via context status. + // + // TODO(nhasabni): Add similar function for input and filter in MklShape. + virtual inline void GetOutputAndPadSizeInMklOrder( + const TensorShape &input_shape, const TensorShape &filter_shape, + const memory::dims &strides, memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, memory::dims *pad_l, + memory::dims *pad_r) { + CHECK_NOTNULL(output_dims_tf_order); + CHECK_NOTNULL(output_dims_mkl_order); + CHECK_NOTNULL(pad_l); + CHECK_NOTNULL(pad_r); + + int input_rows = GetTensorDim(input_shape, data_format_, 'H'); + int input_cols = GetTensorDim(input_shape, data_format_, 'W'); + + // The first dimension for filter is rows/height. + int filter_rows = filter_shape.dim_size(0); + // The second dimension for filter is cols/width. + int filter_cols = filter_shape.dim_size(1); + + // Stride is vector of 2 elements: {s_r, s_c} + int stride_rows = strides[0]; + int stride_cols = strides[1]; + + // Output batch is same as input batch. + int out_batch = GetTensorDim(input_shape, data_format_, 'N'); + // Output depth is same as last dimension for filter. + int out_depth = filter_shape.dim_size(3); + + int64 out_rows = 0, out_cols = 0; + int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; + + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_rows, filter_rows, stride_rows, padding_, + &out_rows, &pad_top, &pad_bottom)); + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_cols, filter_cols, stride_cols, padding_, + &out_cols, &pad_left, &pad_right)); + + // Tensorflow output is in data_format order. (NHWC or NCHW) + TensorShape out_shape = + ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); + *output_dims_tf_order = TFShapeToMklDnnDims(out_shape); + + // MKL-DNN always needs output in NCHW format. + *output_dims_mkl_order = {out_batch, out_depth, static_cast(out_rows), + static_cast(out_cols)}; + + // Now handle padding. MKL-DNN uses asymetric padding. + *pad_l = {static_cast(pad_top), static_cast(pad_left)}; + *pad_r = {static_cast(pad_bottom), static_cast(pad_right)}; + } + + // Calculate output and pad size of forward Convolution operator. + // See comment on GetConvOutputAndPadSizeInMklOrder for parameters. + // + // Function does not return anything, but sets error in context status. + inline void GetOutputAndPadSizeInMklOrder( + size_t src_index, size_t filter_index, const memory::dims &strides, + memory::dims *output_dims_tf_order, memory::dims *output_dims_mkl_order, + memory::dims *pad_l, memory::dims *pad_r) { + CHECK_NOTNULL(output_dims_tf_order); + CHECK_NOTNULL(output_dims_mkl_order); + CHECK_NOTNULL(pad_l); + CHECK_NOTNULL(pad_r); + + const Tensor &input = MklGetInput(context_, src_index); + const Tensor &filter = MklGetInput(context_, filter_index); + + OP_REQUIRES(context_, input.dims() == 4, + errors::InvalidArgument("input must be 4-dimensional", + input.shape().DebugString())); + + GetOutputAndPadSizeInMklOrder(input.shape(), filter.shape(), strides, + output_dims_tf_order, output_dims_mkl_order, + pad_l, pad_r); + } + + // Wrapper function to calculate input, filter, and output sizes of + // 2D Convolution in MKL order (NCHW for input and output; OIHW for filter.) + // Function also calculates output shape in Tensorflow order. Additionally, it + // also calculates strides and paddings for 2D Convolution. + // + // Function does not return anything, but sets error in context status. + inline void GetConvFwdSizesInMklOrder( + const TensorShape &input_shape, const TensorShape &filter_shape, + memory::dims *input_dims, memory::dims *filter_dims, + memory::dims *strides, memory::dims *output_dims_tf_order, + memory::dims *output_dims_mkl_order, memory::dims *pad_l, + memory::dims *pad_r) { + CHECK_NOTNULL(input_dims); + CHECK_NOTNULL(filter_dims); + CHECK_NOTNULL(strides); + CHECK_NOTNULL(output_dims_tf_order); + CHECK_NOTNULL(output_dims_mkl_order); + CHECK_NOTNULL(pad_l); + CHECK_NOTNULL(pad_r); + + GetInputSizeInMklOrder(input_shape, input_dims); + if (!context_->status().ok()) return; + GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); + if (!context_->status().ok()) return; + GetStridesInMklOrder(strides); + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, + output_dims_tf_order, output_dims_mkl_order, + pad_l, pad_r); + if (!context_->status().ok()) return; + } +}; + +#endif // INTEL_MKL_DNN + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ diff --git a/tensorflow/core/kernels/mkl_cwise_ops_common.cc b/tensorflow/core/kernels/mkl_cwise_ops_common.cc index 7fc633c254..c065724e0d 100644 --- a/tensorflow/core/kernels/mkl_cwise_ops_common.cc +++ b/tensorflow/core/kernels/mkl_cwise_ops_common.cc @@ -48,7 +48,7 @@ class MklBinaryOp : public BinaryOp { auto out = context->mutable_output(0); VLOG(1) << "Shapes (output): " << out->shape().DebugString(); - // Pass input shape through to ouput shape + // Pass input shape through to output shape ForwardMklMetaDataInToOut(context, 0, 0); out = context->mutable_output(0); diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index 3c85737702..302a6967e3 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -340,7 +340,7 @@ char* FloatToBuffer(float value, char* buffer) { float parsed_value; if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { snprintf_result = - snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 2, value); + snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value); // Should never overflow; see above. DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index df189af1b8..c0e84c8bb0 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -383,7 +383,8 @@ input_dataset: A handle to an input dataset. Must have a single component. batch_size: A scalar representing the number of elements to accumulate in a batch. row_shape: A vector representing the dense shape of each row in the produced - SparseTensor. + SparseTensor. The shape may be partially specified, using `-1` to indicate + that a particular dimension should use the maximum size of all batch elements. )doc"); REGISTER_OP("RangeDataset") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 3dc16ac457..b34dc1a008 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -29,22 +29,6 @@ using shape_inference::ShapeHandle; namespace { -// A shape function that uses the tensor value at as a shape for -// output 0. If the tensor value is not available, it uses a shape with -// unknown dims. -Status InputTensorShapeOrUnknown(InferenceContext* c, int input_idx, - int ndims) { - ShapeHandle out; - const Tensor* input = c->input_tensor(input_idx); - if (input == nullptr) { - out = c->UnknownShapeOfRank(ndims); - } else { - TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(input_idx, &out)); - } - c->set_output(0, out); - return Status::OK(); -} - Status FractionalPoolShapeFn(InferenceContext* c) { ShapeHandle input; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input)); @@ -119,11 +103,11 @@ REGISTER_OP("AvgPoolGrad") .Attr(GetConvnetDataFormatAttrString()) .Attr("T: {half, float, double}") .SetShapeFn([](InferenceContext* c) { - // NOTE(mrry): We could in principle work out the shape from the - // gradients and the attrs, but if we do not know orig_input_shape - // statically, then we are unlikely to know the shape of the - // gradients either. - return InputTensorShapeOrUnknown(c, 0 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( Computes gradients of the average pooling function. @@ -583,11 +567,11 @@ REGISTER_OP("Conv2DBackpropInput") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - // NOTE(mrry): We could in principle work out the shape from the - // gradients and the attrs, but if we do not know orig_input_shape - // statically, then we are unlikely to know the shape of the - // gradients either. - return InputTensorShapeOrUnknown(c, 0 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( Computes the gradients of convolution with respect to the input. @@ -625,11 +609,11 @@ REGISTER_OP("Conv2DBackpropFilter") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - // NOTE(mrry): We could in principle work out the shape from the - // gradients and the attrs, but if we do not know orig_input_shape - // statically, then we are unlikely to know the shape of the - // gradients either. - return InputTensorShapeOrUnknown(c, 1 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( Computes the gradients of convolution with respect to the filter. @@ -882,11 +866,11 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - // NOTE(mrry): We could in principle work out the shape from the - // gradients and the attrs, but if we do not know orig_input_shape - // statically, then we are unlikely to know the shape of the - // gradients either. - return InputTensorShapeOrUnknown(c, 0 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( Computes the gradients of depthwise convolution with respect to the input. @@ -924,11 +908,11 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - // NOTE(mrry): We could in principle work out the shape from the - // gradients and the attrs, but if we do not know orig_input_shape - // statically, then we are unlikely to know the shape of the - // gradients either. - return InputTensorShapeOrUnknown(c, 1 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( Computes the gradients of depthwise convolution with respect to the filter. @@ -2870,7 +2854,11 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - return InputTensorShapeOrUnknown(c, 1 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( MKL version of Conv2DBackpropFilter. Uses MKL DNN APIs to compute the @@ -2911,7 +2899,11 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) .SetShapeFn([](InferenceContext* c) { - return InputTensorShapeOrUnknown(c, 0 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( MKL version of Convolution2D backward input. Uses MKL DNN APIs to compute the @@ -3034,7 +3026,11 @@ REGISTER_OP("_MklAvgPoolGrad") .Attr(GetConvnetDataFormatAttrString()) .Attr("T: {float, half, double}") .SetShapeFn([](InferenceContext* c) { - return InputTensorShapeOrUnknown(c, 0 /* input_idx */, 4 /* ndims */); + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 4, &s)); + c->set_output(0, s); + return Status::OK(); }) .Doc(R"doc( MKL version of AvgPoolGrad operator. Uses MKL DNN APIs to compute gradients diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc index 51e4f8bffe..4628b725f8 100644 --- a/tensorflow/core/ops/nn_ops_test.cc +++ b/tensorflow/core/ops/nn_ops_test.cc @@ -81,55 +81,6 @@ TEST(NNOpsTest, TopKV2_ShapeFn) { op, "[1,2,3,4];[]"); } -TEST(NNOpsTest, InputTensorShapeOrUnknown2D_ShapeFn) { - typedef std::pair NameAndInputIndex; - for (const auto& p : - {NameAndInputIndex("AvgPoolGrad", 0), - NameAndInputIndex("Conv2DBackpropInput", 0), - NameAndInputIndex("Conv2DBackpropFilter", 1), - NameAndInputIndex("DepthwiseConv2dNativeBackpropInput", 0), - NameAndInputIndex("DepthwiseConv2dNativeBackpropFilter", 1)}) { - ShapeInferenceTestOp op(p.first); - op.input_tensors.resize(2); - - // Conv and Depthwise conv have three inputs. - string extra_shapes = (op.name == "AvgPoolGrad" ? "" : ";?"); - - // When the input tensor is not known, the output is 4 unknown dims. - INFER_OK(op, "?;?" + extra_shapes, "[?,?,?,?]"); - INFER_OK(op, "[4];?" + extra_shapes, "[?,?,?,?]"); - - // When input tensor is known, its values determine output shape. - std::vector shape{1, 2, 3, 4}; - Tensor shape_t = test::AsTensor(shape); - op.input_tensors[p.second] = &shape_t; - INFER_OK(op, "[4];?" + extra_shapes, "[1,2,3,4]"); - } -} - -TEST(NNOpsTest, InputTensorShapeOrUnknown3D_ShapeFn) { - typedef std::pair NameAndInputIndex; - for (const auto& p : {NameAndInputIndex("AvgPool3DGrad", 0), - NameAndInputIndex("Conv3DBackpropInputV2", 0), - NameAndInputIndex("Conv3DBackpropFilterV2", 1)}) { - ShapeInferenceTestOp op(p.first); - op.input_tensors.resize(2); - - // Conv3D has an extra shape. - string extra_shapes = (op.name == "AvgPool3DGrad" ? "" : ";?"); - - // When the input tensor is not known, the output is 4 unknown dims. - INFER_OK(op, "?;?" + extra_shapes, "[?,?,?,?,?]"); - INFER_OK(op, "[5];?" + extra_shapes, "[?,?,?,?,?]"); - - // When input tensor is known, its values determine output shape. - std::vector shape{1, 2, 3, 4, 5}; - Tensor shape_t = test::AsTensor(shape); - op.input_tensors[p.second] = &shape_t; - INFER_OK(op, "[5];?" + extra_shapes, "[1,2,3,4,5]"); - } -} - TEST(NNOpsTest, BatchNormWithGlobalNormalization_ShapeFn) { ShapeInferenceTestOp op("BatchNormWithGlobalNormalization"); diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index f23ff083af..b44ea2e080 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -332,6 +332,7 @@ REGISTER_OP("DecodeCSV") .Attr("OUT_TYPE: list({float,int32,int64,string})") .Attr("field_delim: string = ','") .Attr("use_quote_delim: bool = true") + .Attr("na_value: string = ''") .SetShapeFn([](InferenceContext* c) { // Validate the record_defaults inputs. for (int i = 1; i < c->num_inputs(); ++i) { @@ -362,6 +363,7 @@ field_delim: char delimiter to separate fields in a record. use_quote_delim: If false, treats double quotation marks as regular characters inside of the string fields (ignoring RFC 4180, Section 2, Bullet 5). +na_value: Additional string to recognize as NA/NaN. output: Each tensor will have the same shape as records. )doc"); diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index f4bec9524a..1bfa4f83a3 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -26,13 +26,19 @@ limitations under the License. #include "mkl_trans.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/tensor_format.h" + +#ifdef INTEL_MKL_DNN +#include "mkldnn.hpp" +#endif // The file contains a number of utility classes and functions used by MKL // enabled kernels @@ -219,19 +225,18 @@ class MklShape { // Location from start of buffer where isMklTensor_ is serialized #define DIMS_OFFSET \ (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ -#define SIZES_OFFSET(dims) \ - (DIMS_OFFSET + \ - sizeof(size_t)) // Location of sizes. Note dim is not used here, left here - // to make macros consistent. +// Location of sizes. Note dim is not used here, left here +// to make macros consistent. +#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ (STRIDES_OFFSET(dims) + dims * sizeof(size_t)) // Location of mklLayout_ #define TF_LAYOUT_OFFSET(dims) \ (MKL_LAYOUT_OFFSET(dims) + SIZE_OF_MKL_DNN_BUF) // Location of tfLayout_ +// Location of tf_to_mkl_dim_map_ #define TF_TO_MKL_DIM_MAP_OFFSET(dims) \ - (TF_LAYOUT_OFFSET(dims) + \ - SIZE_OF_MKL_DNN_BUF) // Location of tf_to_mkl_dim_map_ + (TF_LAYOUT_OFFSET(dims) + SIZE_OF_MKL_DNN_BUF) // TODO(agramesh1) make sure to create a const to share with rewrite pass // for min size of MKL metadata tensor. @@ -342,58 +347,6 @@ inline Tensor ConvertMklToTF(OpKernelContext* context, const Tensor& mkl_tensor, return output_tensor; } -// Since our ops are going to produce and also consume N addition tensors -// (Mkl) for N Tensorflow tensors, we can have following different -// orderings among these 2N tensors. -// -// E.g., for Tensorflow tensors A, B, and C, our ops will produce and -// consume A_m, B_m, and C_m additionally. -// -// INTERLEAVED: in this case 2N tensors are interleaved. So for above -// example, the ordering looks like: A, A_m, B, B_m, C, C_m. -// -// CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed -// by N Mkl tensors. So for above example, the ordering looks -// like: A, B, C, A_m, B_m, C_m -// -// Following APIs map index of original Tensorflow tensors to their appropriate -// position based on selected ordering. For contiguous ordering, we need to know -// the total number of tensors (parameter total). -// -typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; -// NOTE: Currently, we use contiguous ordering. If you change this, then you -// would need to change Mkl op definitions in nn_ops.cc. -static MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; - -// Get index of MetaData tensor from index 'n' of Data tensor. -inline int DataIndexToMetaDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - // For interleaved ordering, Mkl tensor follows immediately after - // Tensorflow tensor. - return n + 1; - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. - return n + total_tensors / 2; - } -} - -int inline GetTensorDataIndex(int n, int total_tensors) { - if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { - return 2 * n; // index corresponding to nth input/output tensor - } else { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - return n; - } -} - -int inline GetTensorMetaDataIndex(int n, int total_tensors) { - // Get index for TensorData first and then use mapping function - // to get TensorMetaData index from TensorData index. - int tidx = GetTensorDataIndex(n, total_tensors); - return DataIndexToMetaDataIndex(tidx, total_tensors); -} - // Get the MKL shape from the second string tensor inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { mklshape->DeSerializeMklShape( @@ -480,6 +433,13 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, *buf_out = static_cast(tensor_out->flat().data()); } +template +inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, + TensorShape tf_shape) { + OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), + tf_shape, tensor_out)); +} + inline void GetStridesFromSizes(TensorFormat data_format, size_t* strides, const size_t* sizes) { // MKL requires strides in NCHW @@ -743,56 +703,299 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { } } -namespace mkl_op_registry { -static const char* kMklOpLabel = "MklOp"; -static const char* kMklOpLabelPattern = "label='MklOp'"; +// ------------------------------------------------------------------- + +#ifdef INTEL_MKL_DNN + +using mkldnn::engine; +using mkldnn::memory; +using mkldnn::padding_kind; +using mkldnn::primitive; +using mkldnn::reorder; + +/// Return MKL-DNN data type (memory::data_type) for input type T +/// +/// @input None +/// @return memory::data_type corresponding to type T +template +static memory::data_type MklDnnType(); + +/// Instantiation for float type. Add similar instantiations for other +/// type if needed. +template <> +memory::data_type MklDnnType() { + return memory::data_type::f32; +} + +/// Map TensorFlow's data format into MKL-DNN data format +/// +/// @input: TensorFlow data format +/// @return: memory::format corresponding to TensorFlow data format; +/// Fails with an error if invalid data format. +inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { + if (format == FORMAT_NHWC) + return memory::format::nhwc; + else if (format == FORMAT_NCHW) + return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + // Return to get rid of compiler warning + return memory::format::format_undef; +} -// Get the name of Mkl op from original TensorFlow op -// We prefix 'Mkl' to the original op to get Mkl op. -inline string GetMklOpName(const string& name) { - // Prefix that we add to Tensorflow op name to construct Mkl op name. - const char* const kMklOpPrefix = "_Mkl"; - return string(kMklOpPrefix) + name; +/// Map TensorShape object into memory::dims required by MKL-DNN +/// +/// This function will simply map input TensorShape into MKL-DNN dims +/// naively. So it will preserve the order of dimensions. E.g., if +/// input tensor is in NHWC format, then dims will be in NHWC format +/// also. +/// +/// @input TensorShape object in shape +/// @return memory::dims corresponding to TensorShape +inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { + memory::dims dims(shape.dims()); + for (unsigned int d = 0; d < shape.dims(); ++d) { + dims[d] = shape.dim_size(d); + } + return dims; } -// Check whether opname with type T is registered as MKL-compliant. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as Mkl op; false otherwise -static inline bool IsMklOp(const std::string& op_name, DataType T) { - string kernel = KernelsRegisteredForOp(op_name); - bool result = - kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); - if (result) { - VLOG(1) << "mkl_op_registry::" << op_name << " is " << kMklOpLabel; - } - return result; +/// Map TensorShape object into memory::dims in NCHW format required by MKL-DNN +/// +/// This function is a specific one than above function. It will map input +/// TensorShape into MKL-DNN dims in NCHW format. So it may not preserve the +/// order of dimensions. E.g., if input tensor is in NHWC format, then dims +/// will be in NCHW format, and not in NHWC format. +/// +/// @input TensorShape object in shape +/// @return memory::dims in MKL-DNN required NCHW format +inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, + TensorFormat format) { + // Check validity of format. + CHECK_NE(TFDataFormatToMklDnnDataFormat(format), + memory::format::format_undef); + + int n = shape.dim_size(GetTensorDimIndex(format, 'N')); + int c = shape.dim_size(GetTensorDimIndex(format, 'C')); + int h = shape.dim_size(GetTensorDimIndex(format, 'H')); + int w = shape.dim_size(GetTensorDimIndex(format, 'W')); + + // MKL-DNN requires dimensions in NCHW format. + return memory::dims({n, c, h, w}); } -// Check whether opname with type T is registered as MKL-compliant and -// is element-wise. -// -// @input: name of the op -// @input: T datatype to be used for checking op -// @return: true if opname is registered as element-wise Mkl op; false otherwise -static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { - if (!IsMklOp(op_name, T)) { +inline padding_kind TFPaddingToMklDnnPadding(Padding pad) { + // MKL-DNN only supports zero padding. + return padding_kind::zero; +} + +/* + * Class to represent all the resources corresponding to a tensor in TensorFlow + * that are required to execute an operation (such as Convolution). + */ +template +class MklDnnData { + private: + /// MKL-DNN memory primitive for input user memory + memory* user_memory_; + + /// MKL-DNN memory primitive in case input or output reorder is needed. + memory* reorder_memory_; + + /// Operations memory descriptor + memory::desc* op_md_; + + /// CPU engine on which operation will be executed + const engine* cpu_engine_; + + public: + explicit MklDnnData(const engine* e) + : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), + cpu_engine_(e) {} + + ~MklDnnData() { + cpu_engine_ = nullptr; // We don't own this. + delete (user_memory_); + delete (reorder_memory_); + delete (op_md_); + } + + void* GetTensorBuffer(const Tensor* tensor) { + CHECK_NOTNULL(tensor); + return const_cast( + static_cast(tensor->flat().data())); + } + + /// Set user memory primitive using specified dimensions, memory format and + /// data_buffer. Function automatically uses element data type by using + /// input type T used for creating call object. + /// + /// In a nutshell, function allows user to describe the input tensor to + /// an operation. E.g., filter of Conv2D is of shape {1, 2, 3, 4}, and + /// memory format HWIO, and the buffer that contains actual values is + /// pointed by data_buffer. + void SetUsrMem(memory::dims dim, memory::format fm, void* data_buffer) { + CHECK_NOTNULL(data_buffer); + CHECK_NOTNULL(cpu_engine_); + // TODO(nhasabni): can we remove dynamic memory allocation? + user_memory_ = + new memory(memory::primitive_desc( + memory::desc(dim, MklDnnType(), fm), *cpu_engine_), + data_buffer); + } + + void SetUsrMem(memory::dims dim, memory::format fm, const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(dim, fm, GetTensorBuffer(tensor)); + } + + /// A version of function to set user memory primitive that accepts memory + /// descriptor directly, instead of accepting dimensions and format. This + /// function is more generic that the one above, but the function above is + /// sufficient in most cases. + void SetUsrMem(memory::desc md, void* data_buffer) { + CHECK_NOTNULL(data_buffer); + CHECK_NOTNULL(cpu_engine_); + // TODO(nhasabni): can we remove dynamic memory allocation? + user_memory_ = + new memory(memory::primitive_desc(md, *cpu_engine_), data_buffer); + } + + /// A version of SetUsrMem with memory descriptor and tensor + void SetUsrMem(memory::desc md, const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(md, GetTensorBuffer(tensor)); + } + + /// A version of function to set user memory primitive that accepts primitive + /// descriptor directly, instead of accepting dimensions and format. This + /// function is more generic that the one above, but the function above is + /// sufficient in most cases. + void SetUsrMem(memory::primitive_desc pd, void* data_buffer) { + CHECK_NOTNULL(data_buffer); + CHECK_NOTNULL(cpu_engine_); + // TODO(nhasabni): can we remove dynamic memory allocation? + user_memory_ = new memory(pd, data_buffer); + } + + /// A version of SetUsrMem with primitive descriptor and tensor + void SetUsrMem(memory::primitive_desc pd, const Tensor* tensor) { + CHECK_NOTNULL(tensor); + SetUsrMem(pd, GetTensorBuffer(tensor)); + } + + /// Get function for user memory primitive. + const memory* GetUsrMem() const { return user_memory_; } + + /// Get function for primitive descriptor of user memory primitive. + const memory::primitive_desc GetUsrMemPrimDesc() const { + CHECK_NOTNULL(user_memory_); + return user_memory_->get_primitive_desc(); + } + + /// Get function for descriptor of user memory. + memory::desc GetUsrMemDesc() { + // This is ugly. Why MKL-DNN does not provide desc() method of const type?? + const memory::primitive_desc pd = GetUsrMemPrimDesc(); + return const_cast(&pd)->desc(); + } + + /// Get function for data buffer of user memory primitive. + void* GetUsrMemDataHandle() const { + CHECK_NOTNULL(user_memory_); + return user_memory_->get_data_handle(); + } + + /// Get the memory primitive for input and output of an op. If inputs + /// to an op require reorders, then this function returns memory primitive + /// for reorder. Otherwise, it will return memory primitive for user memory. + /// + /// E.g., Conv2D(I, F) is a primitive with I and F being inputs. Then to + /// execute Conv2D, we need memory primitive for I and F. Buf if reorder is + /// required for I and F (say I_r is reorder primitive for I; F_r is reorder + /// primitive for F), then we need I_r and F_r to perform Conv2D. + const memory& GetOpMem() const { + return reorder_memory_ ? *reorder_memory_ : *user_memory_; + } + + /// Set memory descriptor of an operation in terms of dimensions and memory + /// format. E.g., For Conv2D, the dimensions would be same as user dimensions + /// but memory::format would be mkldnn::any because we want MKL-DNN to choose + /// best layout/format for given input dimensions. + void SetOpMemDesc(const memory::dims& dim, memory::format fm) { + // TODO(nhasabni): can we remove dynamic memory allocation? + op_md_ = new memory::desc(dim, MklDnnType(), fm); + } + + /// Get function for memory descriptor for an operation + const memory::desc& GetOpMemDesc() const { return *op_md_; } + + /// Function to handle input reordering + /// + /// Check if we need to reorder this input of an operation. + /// Return true and allocate reorder memory primitive if reorder is needed. + /// Otherwise, return false and do not allocate reorder memory primitive. + /// + /// To check if reorder is needed, this function compares memory primitive + /// descriptor of an operation (op_pd) for the given input with the + /// user-specified memory primitive descriptor. + /// + /// @input: op_pd - memory primitive descriptor of the given input of an + /// operation + /// @input: net - net to which to add reorder primitive in case it is needed. + /// @return: true in case reorder of input is needed; false, otherwise. + bool CheckReorderToOpMem(const memory::primitive_desc& op_pd, + std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(user_memory_); + if (op_pd != user_memory_->get_primitive_desc()) { + // TODO(nhasabni): can we remove dynamic memory allocation? + reorder_memory_ = new memory(op_pd); + net->push_back(reorder(*user_memory_, *reorder_memory_)); + return true; + } return false; } - bool result = (0 == op_name.compare(GetMklOpName("Add")) || - 0 == op_name.compare(GetMklOpName("Sub")) || - 0 == op_name.compare(GetMklOpName("Mul")) || - 0 == op_name.compare(GetMklOpName("Maximum")) || - 0 == op_name.compare(GetMklOpName("SquaredDifference"))); + /// Function to handle output reorder + /// + /// This function performs very similar functionality as input reordering + /// function above. The only difference is that this function does not add + /// reorder primitive to the net. The reason for this is: the reorder + /// primitive for output needs to be added to the list only after operation + /// has executed. But we need to prepare a temporary buffer in case output + /// reorder is needed. And this temporary buffer will hold the output of + /// an operation before it is fed to reorder primitive. + /// + /// @input memory primitive descriptor for the given output of an operation + /// @return: true in case reorder of output is needed; false, otherwise. + bool PrepareReorderToUserMemIfReq(const memory::primitive_desc& op_pd) { + CHECK_NOTNULL(user_memory_); + if (op_pd != user_memory_->get_primitive_desc()) { + // TODO(nhasabni): can we remove dynamic memory allocation? + reorder_memory_ = new memory(op_pd); + return true; + } + return false; + } - VLOG(1) << "mkl_op_registry::" << op_name - << " is elementwise MKL op: " << result; - return result; -} + /// Function to actually insert reorder primitive in the net + /// + /// This function completes remaining part of output reordering. It inserts + /// a reordering primitive from the temporary buffer that holds the output + /// to the user-specified output buffer. + /// + /// @input: net - net to which to add reorder primitive + void InsertReorderToUserMem(std::vector* net) { + CHECK_NOTNULL(net); + CHECK_NOTNULL(user_memory_); + CHECK_NOTNULL(reorder_memory_); + net->push_back(reorder(*reorder_memory_, *user_memory_)); + } +}; -} // namespace mkl_op_registry +#endif // INTEL_MKL_DNN } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index d8925d3909..e6a4088656 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -429,3 +429,41 @@ Stack Overflow and specify the `tensorflow` tag.
ImportError: cannot import name pywrap_tensorflow
+ +## Tested source configurations +**Linux** + + + + + + + + + + +
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.3.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
tensorflow_gpu-1.3.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.568
tensorflow-1.2.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.5N/AN/A
tensorflow_gpu-1.2.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.55.18
tensorflow-1.1.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
tensorflow_gpu-1.1.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.25.18
tensorflow-1.0.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.2N/AN/A
tensorflow_gpu-1.0.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.4.25.18
+ +**Mac** + + + + + + + + +
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
ttensorflow-1.2.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
ttensorflow-1.1.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
ttensorflow_gpu-1.1.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
ttensorflow-1.0.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.2N/AN/A
ttensorflow_gpu-1.0.0GPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.25.18
+ +**Windows** + + + + + + + + + + +
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.3.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.3.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.368
tensorflow-1.2.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.2.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.35.18
tensorflow-1.1.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.1.0GPU3.5MSVC 2015 update 3Cmake v3.6.35.18
tensorflow-1.0.0CPU3.5MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.0.0GPU3.5MSVC 2015 update 3Cmake v3.6.35.18
diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java index eb4dc69d63..184df1bdb4 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java @@ -37,6 +37,7 @@ import android.content.pm.PackageManager; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; +import android.os.Build; import android.os.Bundle; import android.util.Log; import android.view.View; @@ -151,12 +152,15 @@ public class SpeechActivity extends Activity { // Start the recording and recognition threads. requestMicrophonePermission(); + startRecording(); startRecognition(); } private void requestMicrophonePermission() { - requestPermissions( - new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO); + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + requestPermissions( + new String[]{android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO); + } } @Override diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 6d98c7b85d..1fa2b14869 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -89,7 +89,7 @@ def build_dataset(words, n_words): # Filling 4 global variables: # data - list of codes (integers from 0 to vocabulary_size-1). # This is the original text but words are replaced by their codes -# count - map of words(strings) to count of occurences +# count - map of words(strings) to count of occurrences # dictionary - map of words(strings) to their codes(integers) # reverse_dictionary - maps codes(integers) to words(strings) data, count, dictionary, reverse_dictionary = build_dataset(vocabulary, diff --git a/tensorflow/go/example_inception_inference_test.go b/tensorflow/go/example_inception_inference_test.go index 2162fbe484..f84a588899 100644 --- a/tensorflow/go/example_inception_inference_test.go +++ b/tensorflow/go/example_inception_inference_test.go @@ -28,8 +28,8 @@ import ( "os" "path/filepath" - "github.com/tensorflow/tensorflow/tensorflow/go/op" tf "github.com/tensorflow/tensorflow/tensorflow/go" + "github.com/tensorflow/tensorflow/tensorflow/go/op" ) func Example() { diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index a534a0d659..e8fa21a62b 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -92,7 +92,7 @@ func NewTensor(value interface{}) (*Tensor, error) { raw := tensorData(t.c) buf := bytes.NewBuffer(raw[:0:len(raw)]) if dataType != String { - if err := encodeTensor(buf, val); err != nil { + if err := encodeTensor(buf, val, shape); err != nil { return nil, err } if uintptr(buf.Len()) != nbytes { @@ -100,7 +100,7 @@ func NewTensor(value interface{}) (*Tensor, error) { } } else { e := stringEncoder{offsets: buf, data: raw[nflattened*8 : len(raw)], status: newStatus()} - if err := e.encode(reflect.ValueOf(value)); err != nil { + if err := e.encode(reflect.ValueOf(value), shape); err != nil { return nil, err } if int64(buf.Len()) != nflattened*8 { @@ -236,17 +236,11 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro typ := val.Type() for typ.Kind() == reflect.Array || typ.Kind() == reflect.Slice { shape = append(shape, int64(val.Len())) - // If slice elements are slices, verify that all of them have the same size. - // Go's type system makes that guarantee for arrays. if val.Len() > 0 { - if val.Type().Elem().Kind() == reflect.Slice { - expected := val.Index(0).Len() - for i := 1; i < val.Len(); i++ { - if val.Index(i).Len() != expected { - return shape, dt, fmt.Errorf("mismatched slice lengths: %d and %d", val.Index(i).Len(), expected) - } - } - } + // In order to check tensor structure properly in general case we need to iterate over all slices of the tensor to check sizes match + // Since we already going to iterate over all elements in encodeTensor() let's + // 1) do the actual check in encodeTensor() to save some cpu cycles here + // 2) assume the shape is represented by lengths of elements with zero index in each dimension val = val.Index(0) } typ = typ.Elem() @@ -302,7 +296,7 @@ func byteSizeOfEncodedStrings(val interface{}) uintptr { // encodeTensor writes v to the specified buffer using the format specified in // c_api.h. Use stringEncoder for String tensors. -func encodeTensor(w *bytes.Buffer, v reflect.Value) error { +func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { switch v.Kind() { case reflect.Bool: b := byte(0) @@ -318,19 +312,18 @@ func encodeTensor(w *bytes.Buffer, v reflect.Value) error { } case reflect.Array, reflect.Slice: - // If slice elements are slices, verify that all of them have the same size. + // If current dimension is a slice, verify that it has the expected size // Go's type system makes that guarantee for arrays. - if v.Len() > 0 && v.Type().Elem().Kind() == reflect.Slice { - expected := v.Index(0).Len() - for i := 1; i < v.Len(); i++ { - if v.Index(i).Len() != expected { - return fmt.Errorf("mismatched slice lengths: %d and %d", v.Index(i).Len(), expected) - } + if v.Kind() == reflect.Slice { + expected := int(shape[0]) + if v.Len() != expected { + return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) } } + subShape := shape[1:] for i := 0; i < v.Len(); i++ { - err := encodeTensor(w, v.Index(i)) + err := encodeTensor(w, v.Index(i), subShape) if err != nil { return err } @@ -379,7 +372,7 @@ type stringEncoder struct { status *status } -func (e *stringEncoder) encode(v reflect.Value) error { +func (e *stringEncoder) encode(v reflect.Value, shape []int64) error { if v.Kind() == reflect.String { if err := binary.Write(e.offsets, nativeEndian, e.offset); err != nil { return err @@ -395,8 +388,17 @@ func (e *stringEncoder) encode(v reflect.Value) error { C.free(unsafe.Pointer(src)) return e.status.Err() } + + if v.Kind() == reflect.Slice { + expected := int(shape[0]) + if v.Len() != expected { + return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) + } + } + + subShape := shape[1:] for i := 0; i < v.Len(); i++ { - if err := e.encode(v.Index(i)); err != nil { + if err := e.encode(v.Index(i), subShape); err != nil { return err } } diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 2fc7553f87..35bd2fd9a5 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -42,6 +42,10 @@ func TestNewTensor(t *testing.T) { {[]int64{2}, []bool{true, false}}, {[]int64{1}, []float64{1}}, {[]int64{1}, [1]float64{1}}, + {[]int64{1, 1}, [1][1]float64{{1}}}, + {[]int64{1, 1, 1}, [1][1][]float64{{{1}}}}, + {[]int64{1, 1, 2}, [1][][2]float64{{{1, 2}}}}, + {[]int64{1, 1, 1, 1}, [1][][1][]float64{{{{1}}}}}, {[]int64{2}, []string{"string", "slice"}}, {[]int64{2}, [2]string{"string", "array"}}, {[]int64{3, 2}, [][]float64{{1, 2}, {3, 4}, {5, 6}}}, @@ -74,6 +78,12 @@ func TestNewTensor(t *testing.T) { []uint64{5}, // Mismatched dimensions [][]float32{{1, 2, 3}, {4}}, + // Mismatched dimensions. Should return "mismatched slice lengths" error instead of "BUG" + [][][]float32{{{1, 2}, {3, 4}}, {{1}, {3}}}, + // Mismatched dimensions. Should return error instead of valid tensor + [][][]float32{{{1, 2}, {3, 4}}, {{1}, {3}}, {{1, 2, 3}, {2, 3, 4}}}, + // Mismatched dimensions for strings + [][]string{{"abc"}, {"abcd", "abcd"}}, } for _, test := range tests { diff --git a/tensorflow/java/src/gen/perl/tftypes-runall.pl b/tensorflow/java/src/gen/perl/tftypes-runall.pl index 258c1ff836..a451ce92aa 100644 --- a/tensorflow/java/src/gen/perl/tftypes-runall.pl +++ b/tensorflow/java/src/gen/perl/tftypes-runall.pl @@ -37,4 +37,4 @@ sub locchk { &locchk("$rsrc/tftypes.csv"); system("perl $dir/tftypes.pl -t $rsrc/tftypes.csv $pkg/types"); -# system("perl $dir/tftypes.pl -c $rsrc/tftypes.csv $rsrc/Tensors.java.tmpl > $pkg/op/Tensors.java"); +system("perl $dir/tftypes.pl -c $rsrc/tftypes.csv $rsrc/Tensors.java.tmpl > $pkg/Tensors.java"); diff --git a/tensorflow/java/src/gen/perl/tftypes.pl b/tensorflow/java/src/gen/perl/tftypes.pl index 86867335cb..115723ac8a 100644 --- a/tensorflow/java/src/gen/perl/tftypes.pl +++ b/tensorflow/java/src/gen/perl/tftypes.pl @@ -75,15 +75,23 @@ open (TYPEDESC, $typedesc); my @info = ([]); +sub trim { + (my $ret) = @_; + $ret =~ s/^\s*//g; + $ret =~ s/\s*$//g; + return $ret; +} + while () { chomp; my $line = $_; if ($line =~ m/^TF type/) { next } $line =~ s/\r$//; - (my $name, my $jtype, my $creat, my $default, my $desc) = - split /,/, $line, 5; - $desc =~ s/^ *//g; - $desc =~ s/ *$//g; + my @items = split /,/, $line, 6; + for (my $i = 0; $i <= $#items; $i++) { + $items[$i] = trim $items[$i]; + } + my $jtype = $items[2]; $jtypecount{$jtype}++; if ($jtypecount{$jtype} > 1) { # currently allowing Java types to stand for more than one TF type, but @@ -92,63 +100,85 @@ while () { # exit 1 } - push @info, [$name, $jtype, $creat, $default, $desc]; + push @info, \@items; +} + +sub article { + (my $s) = @_; + if (substr($s, 0, 1) =~ m/^[aeoiu8]$/i) { + return "an $s" + } else { + return "a $s" + } } for (my $i = 1; $i <= $#info; $i++) { - (my $name, my $jtype, my $creat, my $default, my $desc) = + (my $name, my $builtin, my $jtype, my $creat, my $default, my $desc) = @{$info[$i]}; - my $tfname = "TF".$name; + my $tfname = $name; my $ucname = uc $name; + print STDERR "$name $desc\n"; + if ($option eq '-t') { if ($jtype eq '') { next } + if ($builtin eq 'y') { next } # Generate class declarations # print STDERR "Creating $dirname/$tfname.java\n"; open (CLASSFILE, ">$dirname/$tfname.java") || die "Can't open $tfname.java"; - print CLASSFILE $copyright; - print CLASSFILE "// GENERATED FILE. To update, edit tftypes.pl instead.\n\n"; - - my $fulldesc = $desc; - if (substr($desc, 0, 1) =~ m/^[aeoiu8]$/i) { - $fulldesc = "an $desc" - } else { - $fulldesc = "a $desc" - } - print CLASSFILE "package org.tensorflow.types;\n\n" - ."import org.tensorflow.DataType;\n\n"; + print CLASSFILE $copyright, "\n"; + # print CLASSFILE "// GENERATED FILE. To update, edit tftypes.pl instead.\n\n"; + + my $fulldesc = article($desc); + print CLASSFILE "package org.tensorflow.types;\n\n"; print CLASSFILE "/** Represents $fulldesc. */\n" - ."public class $tfname implements TFType {\n" - ." private $tfname() {}\n" - ." static {\n" - ." Types.typeCodes.put($tfname.class, DataType.$ucname);\n" - ." }\n"; - if ($default ne '') { - print CLASSFILE - " static {\n" - ." Types.scalars.put($tfname.class, $default);\n" - ." }\n"; - } - print CLASSFILE "}\n"; + ."public class $tfname {\n" + ." private $tfname() {\n" + ." }\n" + ."}\n"; close(CLASSFILE); } elsif ($option eq '-c') { # Generate creator declarations for Tensors.java if ($jtype ne '' && $creat eq 'y') { - for (my $brackets = ''; length $brackets <= 12; $brackets .= '[]') { + for (my $brackets = '', my $rank = 0; length $brackets <= 12; $brackets .= '[]', $rank++) { + my $datainfo = " * \@param data An array containing the values to put into the new tensor.\n" + ." * The dimensions of the new tensor will match those of the array.\n"; + if ($rank == 0) { + $datainfo = " * \@param data The value to put into the new scalar tensor.\n" + } + + my $trank = $rank; + if ($tfname eq 'String') { + $trank = $rank-1; + next if $trank < 0; + + $datainfo = " * \@param data An array containing the data to put into the new tensor.\n" + ." * String elements are sequences of bytes from the last array dimension.\n"; + } + + + my $intro = ($trank > 0) + ? "Creates a rank-$trank tensor of {\@code $jtype} elements." + : "Creates a scalar tensor containing a single {\@code $jtype} element."; $typeinfo .= - " public static Tensor<$tfname> create($jtype$brackets data) {\n" - ." return Tensor.create(data, $tfname.class);\n" - ." }\n"; + " /**\n" + ." * $intro\n" + ." * \n" + .$datainfo + ." */\n" + ." public static Tensor<$tfname> create($jtype$brackets data) {\n" + ." return Tensor.create(data, $tfname.class);\n" + ." }\n\n"; } } - if ($text =~ m/\b$tfname\b/ || $creat eq 'y') { + if ($text =~ m/\b$tfname\b/ && $builtin eq 'n' && $creat eq 'y') { $imports .= "import org.tensorflow.types.$tfname;\n"; } } } if ($option ne '-t') { - print "// GENERATED FILE. Edits to this file will be lost -- edit $tmpl instead.\n"; +# print "// GENERATED FILE. Edits to this file will be lost -- edit $tmpl instead.\n"; $text =~ s/\@TYPEINFO\@/$typeinfo/; $text =~ s/\@IMPORTS\@/$imports/; diff --git a/tensorflow/java/src/gen/resources/Tensors.java.tmpl b/tensorflow/java/src/gen/resources/Tensors.java.tmpl new file mode 100644 index 0000000000..98e1588559 --- /dev/null +++ b/tensorflow/java/src/gen/resources/Tensors.java.tmpl @@ -0,0 +1,31 @@ +package org.tensorflow; + +import static java.nio.charset.StandardCharsets.UTF_8; +import org.tensorflow.Tensor; +@IMPORTS@ + +/** + * Type-safe factory methods for creating {@link Tensor} objects. + */ +public final class Tensors { + private Tensors() {} + + /** Creates a scalar String tensor using the default, UTF-8 encoding. + * + * @param data The string to put into the new scalar tensor. + */ + public static Tensor create(String data) { + return Tensor.create(data.getBytes(UTF_8), String.class); + } + + /** Creates a scalar String tensor using a specified encoding. + * + * @param charset The encoding from String to bytes. + * @param data The string to put into the new scalar tensor. + */ + public static Tensor create(String data, java.nio.charset.Charset charset) { + return Tensor.create(data.getBytes(charset), String.class); + } + +@TYPEINFO@} + diff --git a/tensorflow/java/src/gen/resources/tftypes.csv b/tensorflow/java/src/gen/resources/tftypes.csv index 88acaafd3c..6f26230f27 100644 --- a/tensorflow/java/src/gen/resources/tftypes.csv +++ b/tensorflow/java/src/gen/resources/tftypes.csv @@ -1,21 +1,21 @@ -TF type,Java type,Creator?,Zero value,Description -Float,float,y,0f,32-bit single precision floating point number -Double,double,y,0.0,64-bit double precision floating point number -Int32,int,y,0,32-bit signed integer -UInt8,byte,n,(byte)0,8-bit unsigned integer -Int16,,n,(short)0,16-bit signed integer -Int8,,n,(byte)0,8-bit signed integer -String,byte,n,,arbitrary sequence of bytes -Complex64,,n,,single-precision complex number -Int64,long,y,0L,64-bit signed integer -Bool,boolean,y,false,boolean -QInt8,,n,,quantized int8 -QUInt8,,n,,quantized uint8 -QInt32,,n,,quantized int32 -BFloat16,,n,,float32 truncated to 16 bits. Only for cast ops. -QInt16,,n,,quantized int16 -QUInt16,,n,,quantized uint16 -UInt16,,n,,16-bit unsigned integer -Complex128,,n,,double-precision complex number -Half,,n,, -Resource,,n,, +TF type,Builtin,Java type,Creator?,Zero value,Description +Float,y,float,y,0f,32-bit single precision floating point number +Double,y,double,y,0.0,64-bit double precision floating point number +Integer,y,int,y,0,32-bit signed integer +UInt8,n,byte,n,(byte)0,8-bit unsigned integer +Short,y,,n,(short)0,16-bit signed integer +Byte,y,,n,(byte)0,8-bit signed integer +String,y,byte,y,,arbitrary sequence of bytes +Complex64,n,,n,,single-precision complex number +Long,y,long,y,0L,64-bit signed integer +Boolean,y,boolean,y,false,boolean +QInt8,n,,n,,quantized int8 +QUInt8,n,,n,,quantized uint8 +QInt32,n,,n,,quantized int32 +BFloat16,n,,n,,float32 truncated to 16 bits. Only for cast ops. +QInt16,n,,n,,quantized int16 +QUInt16,n,,n,,quantized uint16 +UInt16,n,,n,,16-bit unsigned integer +Complex128,n,,n,,double-precision complex number +Half,n,,n,, +Resource,n,,n,, diff --git a/tensorflow/java/src/main/java/org/tensorflow/DataType.java b/tensorflow/java/src/main/java/org/tensorflow/DataType.java index e67e266ff7..e835101d08 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/DataType.java +++ b/tensorflow/java/src/main/java/org/tensorflow/DataType.java @@ -15,7 +15,13 @@ limitations under the License. package org.tensorflow; -/** Type of elements in a {@link Tensor}. */ +import java.util.HashMap; +import java.util.Map; +import org.tensorflow.types.UInt8; + +/** + * Represents the type of elements in a {@link Tensor} as an enum. + */ public enum DataType { /** 32-bit single precision floating point. */ FLOAT(1), @@ -55,14 +61,41 @@ public enum DataType { } // Cached to avoid copying it - final private static DataType[] values = values(); + private static final DataType[] values = values(); static DataType fromC(int c) { for (DataType t : values) { - if (t.value == c) + if (t.value == c) { return t; + } } throw new IllegalArgumentException( "DataType " + c + " is not recognized in Java (version " + TensorFlow.version() + ")"); } + + /** + * Returns the DataType of a Tensor whose elements have the type specified by class {@code c}. + * + * @param c The class describing the TensorFlow type of interest. + */ + public static DataType fromClass(Class c) { + DataType dtype = typeCodes.get(c); + if (dtype == null) { + throw new IllegalArgumentException( + c.getName() + " objects cannot be used as elements in a TensorFlow Tensor"); + } + return dtype; + } + + private static final Map, DataType> typeCodes = new HashMap<>(); + + static { + typeCodes.put(Float.class, DataType.FLOAT); + typeCodes.put(Double.class, DataType.DOUBLE); + typeCodes.put(Integer.class, DataType.INT32); + typeCodes.put(UInt8.class, DataType.UINT8); + typeCodes.put(Long.class, DataType.INT64); + typeCodes.put(Boolean.class, DataType.BOOL); + typeCodes.put(String.class, DataType.STRING); + } } diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java index 58ad3ab193..d4fd3db5f7 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java @@ -81,8 +81,8 @@ public final class Graph implements AutoCloseable { /** * Iterator over all the {@link Operation}s in the graph. * - * The order of iteration is unspecified. Consumers of the iterator will received no notification - * should the underlying graph change during iteration. + *

The order of iteration is unspecified. Consumers of the iterator will receive no + * notification should the underlying graph change during iteration. */ public Iterator operations() { return new OperationIterator(this); @@ -245,7 +245,8 @@ public final class Graph implements AutoCloseable { private static native long operation(long handle, String name); - // This method returns the Operation native handle at index 0 and the new value for pos at index 1 (see TF_GraphNextOperation) + // This method returns the Operation native handle at index 0 and the new value for pos at index 1 + // (see TF_GraphNextOperation) private static native long[] nextOperation(long handle, int position); private static native void importGraphDef(long handle, byte[] graphDef, String prefix) diff --git a/tensorflow/java/src/main/java/org/tensorflow/Input.java b/tensorflow/java/src/main/java/org/tensorflow/Input.java index 8e6685ee0f..13bc463e7d 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Input.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Input.java @@ -34,7 +34,7 @@ package org.tensorflow; * ops.array().concat(0, split); * } */ -public interface Input { +public interface Input { /** * Returns the symbolic handle of a tensor. @@ -44,5 +44,5 @@ public interface Input { * * @see OperationBuilder#addInput(Output) */ - Output asOutput(); + Output asOutput(); } diff --git a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java index d2d019babb..2b431eebf5 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java +++ b/tensorflow/java/src/main/java/org/tensorflow/NativeLibrary.java @@ -122,8 +122,7 @@ final class NativeLibrary { } private static String extractResource( - InputStream resource, String resourceName, String extractToDirectory) - throws IOException { + InputStream resource, String resourceName, String extractToDirectory) throws IOException { final File dst = new File(extractToDirectory, System.mapLibraryName(resourceName)); dst.deleteOnExit(); final String dstPath = dst.toString(); @@ -184,8 +183,7 @@ final class NativeLibrary { // compatibility. private static File createTemporaryDirectory() { File baseDirectory = new File(System.getProperty("java.io.tmpdir")); - String directoryName - = "tensorflow_native_libraries-" + System.currentTimeMillis() + "-"; + String directoryName = "tensorflow_native_libraries-" + System.currentTimeMillis() + "-"; for (int attempt = 0; attempt < 1000; attempt++) { File temporaryDirectory = new File(baseDirectory, directoryName + attempt); if (temporaryDirectory.mkdir()) { @@ -194,7 +192,8 @@ final class NativeLibrary { } throw new IllegalStateException( "Could not create a temporary directory (tried to make " - + directoryName + "*) to extract TensorFlow native libraries."); + + directoryName + + "*) to extract TensorFlow native libraries."); } private NativeLibrary() {} diff --git a/tensorflow/java/src/main/java/org/tensorflow/Operand.java b/tensorflow/java/src/main/java/org/tensorflow/Operand.java index 695c4c1060..61082e83d5 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Operand.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Operand.java @@ -22,19 +22,19 @@ package org.tensorflow; * *

{@code
  * // The "decodeJpeg" operation can be used as an operand to the "cast" operation
- * Operand decodeJpeg = ops.image().decodeJpeg(...);
+ * Operand decodeJpeg = ops.image().decodeJpeg(...);
  * ops.math().cast(decodeJpeg, DataType.FLOAT);
  *
  * // The output "y" of the "unique" operation can be used as an operand to the "cast" operation
- * Output y = ops.array().unique(...).y();
- * ops.math().cast(y, DataType.FLOAT);
+ * Output y = ops.array().unique(...).y();
+ * ops.math().cast(y, Float.class);
  *
  * // The "split" operation can be used as operand list to the "concat" operation
- * Iterable split = ops.array().split(...);
+ * Iterable> split = ops.array().split(...);
  * ops.array().concat(0, split);
  * }
*/ -public interface Operand { +public interface Operand { /** * Returns the symbolic handle of a tensor. @@ -44,5 +44,5 @@ public interface Operand { * * @see OperationBuilder#addInput(Output) */ - Output asOutput(); + Output asOutput(); } diff --git a/tensorflow/java/src/main/java/org/tensorflow/Operation.java b/tensorflow/java/src/main/java/org/tensorflow/Operation.java index ec26309fba..6b82e5780b 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Operation.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Operation.java @@ -98,16 +98,26 @@ public final class Operation { * @param length number of tensors in the list * @return array of {@code Output} */ - public Output[] outputList(int idx, int length) { - Output[] outputs = new Output[length]; + public Output[] outputList(int idx, int length) { + Output[] outputs = new Output[length]; for (int i = 0; i < length; ++i) { outputs[i] = output(idx + i); } return outputs; } - /** Returns a symbolic handle to one of the tensors produced by this operation. */ - public Output output(int idx) { + /** + * Returns a symbolic handle to one of the tensors produced by this operation. + * + *

Warning: Does not check that the type of the tensor matches T. It is recommended to call + * this method with an explicit type parameter rather than letting it be inferred, e.g. {@code + * operation.output(0)} + * + * @param The expected element type of the tensors produced by this output. + * @param idx The index of the output among the outputs produced by this operation. + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + public Output output(int idx) { return new Output(this, idx); } diff --git a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java index 15077ce439..9a1b7592b3 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java +++ b/tensorflow/java/src/main/java/org/tensorflow/OperationBuilder.java @@ -63,7 +63,6 @@ public final class OperationBuilder { } } - /** * Returns the builder to create an operation. * @@ -73,7 +72,7 @@ public final class OperationBuilder { * @param input {@link Output} supposed to be the input of the OperationBuilder. * @return the OperationBuilder instance for chaining. */ - public OperationBuilder addInput(Output input) { + public OperationBuilder addInput(Output input) { Graph.Reference r = graph.ref(); try { addInput(unsafeNativeHandle, input.op().getUnsafeNativeHandle(), input.index()); @@ -106,7 +105,7 @@ public final class OperationBuilder { return this; } - public OperationBuilder addInputList(Output[] inputs) { + public OperationBuilder addInputList(Output[] inputs) { Graph.Reference r = graph.ref(); try { long[] opHandles = new long[inputs.length]; @@ -231,7 +230,7 @@ public final class OperationBuilder { return this; } - public OperationBuilder setAttr(String name, Tensor value) { + public OperationBuilder setAttr(String name, Tensor value) { Graph.Reference r = graph.ref(); try { setAttrTensor(unsafeNativeHandle, name, value.getNativeHandle()); @@ -241,10 +240,10 @@ public final class OperationBuilder { return this; } - public OperationBuilder setAttr(String name, Tensor[] value) { + public OperationBuilder setAttr(String name, Tensor[] value) { long[] handles = new long[value.length]; int idx = 0; - for (Tensor t : value) { + for (Tensor t : value) { handles[idx++] = t.getNativeHandle(); } Graph.Reference r = graph.ref(); @@ -266,7 +265,7 @@ public final class OperationBuilder { return this; } - public OperationBuilder setAttr(String name, String[] value) { + public OperationBuilder setAttr(String name, String[] value) { Charset utf8 = Charset.forName("UTF-8"); Object[] objects = new Object[value.length]; for (int i = 0; i < value.length; ++i) { @@ -326,5 +325,4 @@ public final class OperationBuilder { private static native void setAttrShape(long handle, String name, long[] shape, int numDims); private static native void setAttrStringList(long handle, String name, Object[] value); - } diff --git a/tensorflow/java/src/main/java/org/tensorflow/Output.java b/tensorflow/java/src/main/java/org/tensorflow/Output.java index 8dff50fafb..0e17a722ff 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Output.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Output.java @@ -20,13 +20,13 @@ import java.util.Objects; /** * A symbolic handle to a tensor produced by an {@link Operation}. * - *

An Output is a symbolic handle to a tensor. The value of the Tensor is computed by executing - * the {@link Operation} in a {@link Session}. + *

An Output is a symbolic handle to a Tensor. The value of the tensor is computed by + * executing the {@link Operation} in a {@link Session}. * *

By implementing the {@link Operand} interface, instances of this class also act as operands to * {@link org.tensorflow.op.Op Op} instances. */ -public final class Output implements Operand { +public final class Output implements Operand { /** Handle to the idx-th output of the Operation {@code op}. */ public Output(Operation op, int idx) { @@ -55,7 +55,7 @@ public final class Output implements Operand { } @Override - public Output asOutput() { + public Output asOutput() { return this; } @@ -69,8 +69,8 @@ public final class Output implements Operand { if (o == this) { return true; } - if (o instanceof Output) { - Output that = (Output) o; + if (o instanceof Output) { + Output that = (Output) o; return index == that.index && operation.equals(that.operation); } return false; diff --git a/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java b/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java index b4591dd869..c8b9126f03 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java +++ b/tensorflow/java/src/main/java/org/tensorflow/SavedModelBundle.java @@ -27,8 +27,9 @@ package org.tensorflow; public class SavedModelBundle implements AutoCloseable { /** - * Load a saved model from an export directory. The model that is being loaded should be created using - * the Saved Model API. + * Load a saved model from an export directory. The model that is being loaded should be created + * using the Saved Model + * API. * * @param exportDir the directory path containing a saved model. * @param tags the tags identifying the specific metagraphdef to load. diff --git a/tensorflow/java/src/main/java/org/tensorflow/Session.java b/tensorflow/java/src/main/java/org/tensorflow/Session.java index 83a300a560..73324f23e6 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Session.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Session.java @@ -127,7 +127,7 @@ public final class Session implements AutoCloseable { * {@code SignatureDef} protocol buffer messages that are included in {@link * SavedModelBundle#metaGraphDef()}. */ - public Runner feed(String operation, Tensor t) { + public Runner feed(String operation, Tensor t) { return feed(parseOutput(operation), t); } @@ -138,7 +138,7 @@ public final class Session implements AutoCloseable { *

Operations in a {@link Graph} can have multiple outputs, {@code index} identifies which * one {@code t} is being provided for. */ - public Runner feed(String operation, int index, Tensor t) { + public Runner feed(String operation, int index, Tensor t) { Operation op = operationByName(operation); if (op != null) { inputs.add(op.output(index)); @@ -151,7 +151,7 @@ public final class Session implements AutoCloseable { * Use {@code t} instead of the Tensor referred to by executing the operation referred to by * {@code output}. */ - public Runner feed(Output o, Tensor t) { + public Runner feed(Output o, Tensor t) { inputs.add(o); inputTensors.add(t); return this; @@ -186,7 +186,7 @@ public final class Session implements AutoCloseable { } /** Makes {@link #run()} return the Tensor referred to by {@code output}. */ - public Runner fetch(Output output) { + public Runner fetch(Output output) { outputs.add(output); return this; } @@ -240,8 +240,11 @@ public final class Session implements AutoCloseable { * easier for the caller to cleanup (perhaps returning something like AutoCloseableList in * SessionTest.java), and (b) Evaluate whether the return value should be a list, or maybe a * {@code Map}? + * + *

TODO(andrewmyers): It would also be good if whatever is returned here made it easier to + * extract output tensors in a type-safe way. */ - public List run() { + public List> run() { return runHelper(false).outputs; } @@ -269,17 +272,17 @@ public final class Session implements AutoCloseable { // It's okay to use Operation.getUnsafeNativeHandle() here since the safety depends on the // validity of the Graph and graphRef ensures that. int idx = 0; - for (Tensor t : inputTensors) { + for (Tensor t : inputTensors) { inputTensorHandles[idx++] = t.getNativeHandle(); } idx = 0; - for (Output o : inputs) { + for (Output o : inputs) { inputOpHandles[idx] = o.op().getUnsafeNativeHandle(); inputOpIndices[idx] = o.index(); idx++; } idx = 0; - for (Output o : outputs) { + for (Output o : outputs) { outputOpHandles[idx] = o.op().getUnsafeNativeHandle(); outputOpIndices[idx] = o.index(); idx++; @@ -306,12 +309,12 @@ public final class Session implements AutoCloseable { } finally { runRef.close(); } - List outputs = new ArrayList(); + List> outputs = new ArrayList>(); for (long h : outputTensorHandles) { try { outputs.add(Tensor.fromHandle(h)); } catch (Exception e) { - for (Tensor t : outputs) { + for (Tensor t : outputs) { t.close(); } outputs.clear(); @@ -355,7 +358,8 @@ public final class Session implements AutoCloseable { return op; } - private Output parseOutput(String opName) { + @SuppressWarnings("rawtypes") + private Output parseOutput(String opName) { int colon = opName.lastIndexOf(':'); if (colon == -1 || colon == opName.length() - 1) { return new Output(operationByName(opName), 0); @@ -369,9 +373,9 @@ public final class Session implements AutoCloseable { } } - private ArrayList inputs = new ArrayList(); - private ArrayList inputTensors = new ArrayList(); - private ArrayList outputs = new ArrayList(); + private ArrayList> inputs = new ArrayList>(); + private ArrayList> inputTensors = new ArrayList>(); + private ArrayList> outputs = new ArrayList>(); private ArrayList targets = new ArrayList(); private byte[] runOptions = null; } @@ -388,7 +392,7 @@ public final class Session implements AutoCloseable { */ public static final class Run { /** Tensors from requested fetches. */ - public List outputs; + public List> outputs; /** * (Experimental): Metadata about the run. diff --git a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java index c5ad1ee51c..d4b753628b 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Tensor.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Tensor.java @@ -28,89 +28,117 @@ import java.util.Arrays; import java.util.HashMap; /** - * A typed multi-dimensional array. + * A statically typed multi-dimensional array whose elements are of a type described by T. * *

Instances of a Tensor are not thread-safe. * *

WARNING: Resources consumed by the Tensor object must be explicitly freed by * invoking the {@link #close()} method when the object is no longer needed. For example, using a - * try-with-resources block like: + * try-with-resources block: * *

{@code
- * try(Tensor t = Tensor.create(...)) {
+ * try (Tensor t = Tensor.create(...)) {
  *   doSomethingWith(t);
  * }
  * }
*/ -public final class Tensor implements AutoCloseable { +public final class Tensor implements AutoCloseable { /** - * Create a Tensor from a Java object. + * Creates a Tensor from a Java object. * - *

A Tensor is a multi-dimensional array of elements of a limited set of types ({@link - * DataType}). Thus, not all Java objects can be converted to a Tensor. In particular, {@code obj} - * must be either a primitive (float, double, int, long, boolean) or a multi-dimensional array of - * one of those primitives. For example: + *

A {@code Tensor} is a multi-dimensional array of elements of a limited set of types ({@link + * types}), so not all Java objects can be converted to a {@code Tensor}. In particular, the + * argument {@code obj} must be either a primitive (float, double, int, long, boolean, byte) or a + * multi-dimensional array of one of those primitives. The argument {@code type} specifies how to + * interpret the first argument as a TensorFlow type. For example: * *

{@code
    * // Valid: A 64-bit integer scalar.
-   * Tensor s = Tensor.create(42L);
+   * Tensor s = Tensor.create(42L, Long.class);
    *
    * // Valid: A 3x2 matrix of floats.
    * float[][] matrix = new float[3][2];
-   * Tensor m = Tensor.create(matrix);
+   * Tensor m = Tensor.create(matrix, Float.class);
    *
    * // Invalid: Will throw an IllegalArgumentException as an arbitrary Object
    * // does not fit into the TensorFlow type system.
-   * Tensor o = Tensor.create(new Object());
+   * Tensor o = Tensor.create(new Object())
    *
    * // Invalid: Will throw an IllegalArgumentException since there are
    * // a differing number of elements in each row of this 2-D array.
    * int[][] twoD = new int[2][];
    * twoD[0] = new int[1];
    * twoD[1] = new int[2];
-   * Tensor x = Tensor.create(twoD);
+   * Tensor x = Tensor.create(twoD, Integer.class);
    * }
* - * {@link DataType#STRING} typed Tensors are multi-dimensionary arrays of arbitrary byte sequences - * and thus have {@code byte[]} and not {@code String}-valued elements. For example: + * {@link String}-typed Tensors are multi-dimensional arrays of arbitrary byte sequences, so can + * be initialized from arrays of {@code byte[]} elements. For example: * *
{@code
-   * // Valid: A DataType.STRING tensor.
-   * Tensor s = Tensor.create(new byte[]{1, 2, 3});
+   * // Valid: A String tensor.
+   * Tensor s = Tensor.create(new byte[]{1, 2, 3}, String.class);
    *
    * // Java Strings will need to be encoded into a byte-sequence.
    * String mystring = "foo";
-   * Tensor s = Tensor.create(mystring.getBytes("UTF-8"));
+   * Tensor s = Tensor.create(mystring.getBytes("UTF-8"), String.class);
    *
-   * // Valid: Matrix of DataType.STRING tensors.
+   * // Valid: Matrix of String tensors.
    * // Each element might have a different length.
    * byte[][][] matrix = new byte[2][2][];
    * matrix[0][0] = "this".getBytes("UTF-8");
    * matrix[0][1] = "is".getBytes("UTF-8");
    * matrix[1][0] = "a".getBytes("UTF-8");
    * matrix[1][1] = "matrix".getBytes("UTF-8");
-   * Tensor m = Tensor.create(matrix);
+   * Tensor m = Tensor.create(matrix, String.class);
    * }
* + * @param obj The object to convert to a Tensor. Note that whether it is compatible with the + * type T is not checked by the type system. For type-safe creation of tensors, use {@link + * Tensors}. + * @param type The class object representing the type T. * @throws IllegalArgumentException if {@code obj} is not compatible with the TensorFlow type - * system, or if obj does not disambiguate between multiple DataTypes. In that case, consider - * using {@link #create(DataType, long[], ByteBuffer)} instead. + * system. */ - public static Tensor create(Object obj) { + @SuppressWarnings("unchecked") + public static Tensor create(Object obj, Class type) { + DataType dtype = DataType.fromClass(type); + if (!objectCompatWithType(obj, dtype)) { + throw new IllegalArgumentException( + "DataType of object does not match T (expected " + + dtype + + ", got " + + dataTypeOf(obj) + + ")"); + } + return (Tensor) create(obj, dtype); + } + + /** + * Creates a tensor from an object whose class is inspected to figure out what the underlying data + * type should be. + * + * @throws IllegalArgumentException if {@code obj} is not compatible with the TensorFlow type + * system. + */ + public static Tensor create(Object obj) { return create(obj, dataTypeOf(obj)); } /** - * Create a Tensor of data type {@code dtype} from a Java object. + * Create a Tensor of data type {@code dtype} from a Java object. Requires the parameter {@code T} + * to match {@code type}, but this condition is not checked. * - * @param dtype the intended tensor data type. It must match the the run-time type of the object. + * @param obj the object supplying the tensor data. + * @param dtype the data type of the tensor to create. It must be compatible with the run-time + * type of the object. + * @return the new tensor */ - static Tensor create(Object obj, DataType dtype) { - Tensor t = new Tensor(); - t.dtype = dtype; + private static Tensor create(Object obj, DataType dtype) { + @SuppressWarnings("rawtypes") + Tensor t = new Tensor(dtype); t.shapeCopy = new long[numDimensions(obj, dtype)]; - assert objectCompatWithType(obj, dtype); fillShape(obj, 0, t.shapeCopy); if (t.dtype != DataType.STRING) { int byteSize = elemByteSize(t.dtype) * numElements(t.shapeCopy); @@ -125,7 +153,7 @@ public final class Tensor implements AutoCloseable { } /** - * Create an {@link DataType#INT32} Tensor with data from the given buffer. + * Create a {@link Integer} Tensor with data from the given buffer. * *

Creates a Tensor with the given shape by copying elements from the buffer (starting from its * current position) into the tensor. For example, if {@code shape = {2,3} } (which represents a @@ -136,14 +164,14 @@ public final class Tensor implements AutoCloseable { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Tensor create(long[] shape, IntBuffer data) { - Tensor t = allocateForBuffer(DataType.INT32, shape, data.remaining()); + public static Tensor create(long[] shape, IntBuffer data) { + Tensor t = allocateForBuffer(DataType.INT32, shape, data.remaining()); t.buffer().asIntBuffer().put(data); return t; } /** - * Create a {@link DataType#FLOAT} Tensor with data from the given buffer. + * Create a {@link Float} Tensor with data from the given buffer. * *

Creates a Tensor with the given shape by copying elements from the buffer (starting from its * current position) into the tensor. For example, if {@code shape = {2,3} } (which represents a @@ -154,14 +182,14 @@ public final class Tensor implements AutoCloseable { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Tensor create(long[] shape, FloatBuffer data) { - Tensor t = allocateForBuffer(DataType.FLOAT, shape, data.remaining()); + public static Tensor create(long[] shape, FloatBuffer data) { + Tensor t = allocateForBuffer(DataType.FLOAT, shape, data.remaining()); t.buffer().asFloatBuffer().put(data); return t; } /** - * Create a {@link DataType#DOUBLE} Tensor with data from the given buffer. + * Create a {@link Double} Tensor with data from the given buffer. * *

Creates a Tensor with the given shape by copying elements from the buffer (starting from its * current position) into the tensor. For example, if {@code shape = {2,3} } (which represents a @@ -172,14 +200,14 @@ public final class Tensor implements AutoCloseable { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Tensor create(long[] shape, DoubleBuffer data) { - Tensor t = allocateForBuffer(DataType.DOUBLE, shape, data.remaining()); + public static Tensor create(long[] shape, DoubleBuffer data) { + Tensor t = allocateForBuffer(DataType.DOUBLE, shape, data.remaining()); t.buffer().asDoubleBuffer().put(data); return t; } /** - * Create an {@link DataType#INT64} Tensor with data from the given buffer. + * Create an {@link Long} Tensor with data from the given buffer. * *

Creates a Tensor with the given shape by copying elements from the buffer (starting from its * current position) into the tensor. For example, if {@code shape = {2,3} } (which represents a @@ -190,47 +218,87 @@ public final class Tensor implements AutoCloseable { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Tensor create(long[] shape, LongBuffer data) { - Tensor t = allocateForBuffer(DataType.INT64, shape, data.remaining()); + public static Tensor create(long[] shape, LongBuffer data) { + Tensor t = allocateForBuffer(DataType.INT64, shape, data.remaining()); t.buffer().asLongBuffer().put(data); return t; } /** - * Create a Tensor with data from the given buffer. + * Create a Tensor of any type with data from the given buffer. + * + *

Creates a Tensor with the provided shape of any type where the tensor's data has been + * encoded into {@code data} as per the specification of the TensorFlow C API. + * + * @param the tensor element type + * @param type the tensor element type, represented as a class object. + * @param shape the tensor shape. + * @param data a buffer containing the tensor data. + * @throws IllegalArgumentException If the tensor datatype or shape is not compatible with the + * buffer + */ + public static Tensor create(Class type, long[] shape, ByteBuffer data) { + @SuppressWarnings("unchecked") + Tensor ret = (Tensor) create(DataType.fromClass(type), shape, data); + return ret; + } + + /** + * Creates a Tensor of any type with data from the given buffer. * *

Creates a Tensor with the provided shape of any type where the tensor's data has been * encoded into {@code data} as per the specification of the TensorFlow C API. * - * @param dataType the tensor datatype. + * @param The tensor element type + * @param type the tensor element type, specified as a DataType. This must agree with T. * @param shape the tensor shape. * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor datatype or shape is not compatible with the * buffer */ - public static Tensor create(DataType dataType, long[] shape, ByteBuffer data) { + private static Tensor create(DataType dtype, long[] shape, ByteBuffer data) { int nremaining = 0; - if (dataType != DataType.STRING) { - int elemBytes = elemByteSize(dataType); + if (dtype != DataType.STRING) { + int elemBytes = elemByteSize(dtype); if (data.remaining() % elemBytes != 0) { throw new IllegalArgumentException( String.format( "ByteBuffer with %d bytes is not compatible with a %s Tensor (%d bytes/element)", - data.remaining(), dataType.toString(), elemBytes)); + data.remaining(), dtype.toString(), elemBytes)); } nremaining = data.remaining() / elemBytes; } else { nremaining = data.remaining(); } - Tensor t = allocateForBuffer(dataType, shape, nremaining); + Tensor t = allocateForBuffer(dtype, shape, nremaining); t.buffer().put(data); return t; } + /** + * Returns this Tensor object with the type {@code Tensor}. This method is useful when given a + * value of type {@code Tensor}. + * + * @param type any (non-null) array of the correct type. + * @throws IllegalArgumentException if the actual data type of this object does not match the type + * {@code U}. + */ + @SuppressWarnings("unchecked") + public Tensor expect(Class type) { + DataType dt = DataType.fromClass(type); + if (!dt.equals(dtype)) { + throw new IllegalArgumentException( + "Cannot cast from tensor of " + dtype + " to tensor of " + dt); + } + return ((Tensor) this); + } + // Helper function to allocate a Tensor for the create() methods that create a Tensor from // a java.nio.Buffer. - private static Tensor allocateForBuffer(DataType dataType, long[] shape, int nBuffered) { + // Requires: dataType matches T + private static Tensor allocateForBuffer(DataType dataType, long[] shape, int nBuffered) { final int nflattened = numElements(shape); int nbytes = 0; if (dataType != DataType.STRING) { @@ -242,8 +310,7 @@ public final class Tensor implements AutoCloseable { // DT_STRING tensor encoded in a ByteBuffer. nbytes = nBuffered; } - Tensor t = new Tensor(); - t.dtype = dataType; + Tensor t = new Tensor(dataType); t.shapeCopy = Arrays.copyOf(shape, shape.length); t.nativeHandle = allocate(t.dtype.c(), t.shapeCopy, nbytes); return t; @@ -300,7 +367,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#FLOAT} tensor. + * Returns the value in a scalar {@link Float} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a float scalar. */ @@ -309,7 +376,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#DOUBLE} tensor. + * Returns the value in a scalar {@link Double} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a double scalar. */ @@ -318,7 +385,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#INT32} tensor. + * Returns the value in a scalar {@link Integer} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a int scalar. */ @@ -327,7 +394,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#INT64} tensor. + * Returns the value in a scalar {@link Long} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a long scalar. */ @@ -336,7 +403,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#BOOL} tensor. + * Returns the value in a scalar {@link Boolean} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a boolean scalar. */ @@ -345,7 +412,7 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the value in a scalar {@link DataType#STRING} tensor. + * Returns the value in a scalar {@link String} tensor. * * @throws IllegalArgumentException if the Tensor does not represent a boolean scalar. */ @@ -377,21 +444,21 @@ public final class Tensor implements AutoCloseable { * @throws IllegalArgumentException if the tensor is a scalar or if {@code dst} is not compatible * with the tensor (for example, mismatched data types or shapes). */ - public T copyTo(T dst) { + public U copyTo(U dst) { throwExceptionIfTypeIsIncompatible(dst); readNDArray(nativeHandle, dst); return dst; } /** - * Write the data of a {@link DataType#INT32} tensor into the given buffer. + * Write the data of a {@link Integer} tensor into the given buffer. * *

Copies {@code numElements()} elements to the buffer. * * @param dst the destination buffer * @throws BufferOverflowException If there is insufficient space in the given buffer for the data * in this tensor - * @throws IllegalArgumentException If the tensor datatype is not {@link DataType#INT32} + * @throws IllegalArgumentException If the tensor data type is not {@link Integer} */ public void writeTo(IntBuffer dst) { if (dtype != DataType.INT32) { @@ -402,14 +469,14 @@ public final class Tensor implements AutoCloseable { } /** - * Write the data of a {@link DataType#FLOAT} tensor into the given buffer. + * Write the data of a {@link Float} tensor into the given buffer. * *

Copies {@code numElements()} elements to the buffer. * * @param dst the destination buffer * @throws BufferOverflowException If there is insufficient space in the given buffer for the data * in this tensor - * @throws IllegalArgumentException If the tensor datatype is not {@link DataType#FLOAT} + * @throws IllegalArgumentException If the tensor datatype is not {@link Float} */ public void writeTo(FloatBuffer dst) { if (dtype != DataType.FLOAT) { @@ -420,14 +487,14 @@ public final class Tensor implements AutoCloseable { } /** - * Write the data of a {@link DataType#DOUBLE} tensor into the given buffer. + * Write the data of a {@link Double} tensor into the given buffer. * *

Copies {@code numElements()} elements to the buffer. * * @param dst the destination buffer * @throws BufferOverflowException If there is insufficient space in the given buffer for the data * in this tensor - * @throws IllegalArgumentException If the tensor datatype is not {@link DataType#DOUBLE} + * @throws IllegalArgumentException If the tensor datatype is not {@link Double} */ public void writeTo(DoubleBuffer dst) { if (dtype != DataType.DOUBLE) { @@ -438,14 +505,14 @@ public final class Tensor implements AutoCloseable { } /** - * Write the data of a {@link DataType#INT64} tensor into the given buffer. + * Write the data of a {@link Long} tensor into the given buffer. * *

Copies {@code numElements()} elements to the buffer. * * @param dst the destination buffer * @throws BufferOverflowException If there is insufficient space in the given buffer for the data * in this tensor - * @throws IllegalArgumentException If the tensor datatype is not {@link DataType#INT64} + * @throws IllegalArgumentException If the tensor datatype is not {@link Long} */ public void writeTo(LongBuffer dst) { if (dtype != DataType.INT64) { @@ -480,9 +547,9 @@ public final class Tensor implements AutoCloseable { * *

Takes ownership of the handle. */ - static Tensor fromHandle(long handle) { - Tensor t = new Tensor(); - t.dtype = DataType.fromC(dtype(handle)); + static Tensor fromHandle(long handle) { + @SuppressWarnings("rawtypes") + Tensor t = new Tensor(DataType.fromC(dtype(handle))); t.shapeCopy = shape(handle); t.nativeHandle = handle; return t; @@ -496,7 +563,9 @@ public final class Tensor implements AutoCloseable { private DataType dtype; private long[] shapeCopy = null; - private Tensor() {} + private Tensor(DataType t) { + dtype = t; + } private ByteBuffer buffer() { return buffer(nativeHandle).order(ByteOrder.nativeOrder()); @@ -564,11 +633,26 @@ public final class Tensor implements AutoCloseable { classDataTypes.put(Boolean.class, DataType.BOOL); } - private static DataType dataTypeOf(Object o) { + /** The class for the data type to which Java object o corresponds. */ + private static Class baseObjType(Object o) { Class c = o.getClass(); while (c.isArray()) { c = c.getComponentType(); } + return c; + } + + /** + * The default TensorFlow data type to which Java object o corresponds. Some Java objects + * represent more than one TensorFlow data type; for example, 'byte' can represent both {@code + * uint8} and {@code string}, with the latter being the default interpretation. + */ + private static DataType dataTypeOf(Object o) { + Class c = baseObjType(o); + return dataTypeFromClass(c); + } + + private static DataType dataTypeFromClass(Class c) { DataType ret = classDataTypes.get(c); if (ret != null) { return ret; @@ -577,7 +661,12 @@ public final class Tensor implements AutoCloseable { } /** - * Returns the number of dimensions of a tensor of type dtype when represented by the object o. + * Return the number of dimensions of the tensor that object {@code o} represents as a tensor + * whose datatype is {@code dtype}. Normally this is the same as the number of dimensions of o + * itself, but is one smaller for tensors of strings. + * + * @param o The object to inspect. It must be a valid representation of the given data type. + * @param dtype The expected data type of the tensor. */ private static int numDimensions(Object o, DataType dtype) { int ret = numArrayDimensions(o); @@ -624,7 +713,13 @@ public final class Tensor implements AutoCloseable { /** Returns whether the object {@code obj} can represent a tensor with data type {@code dtype}. */ private static boolean objectCompatWithType(Object obj, DataType dtype) { - DataType dto = dataTypeOf(obj); + Class c = baseObjType(obj); + DataType dto = dataTypeFromClass(c); + int nd = numDimensions(obj, dto); + if (!c.isPrimitive() && c != String.class && nd != 0) { + throw new IllegalArgumentException( + "cannot create non-scalar Tensors from arrays of boxed values"); + } if (dto.equals(dtype)) { return true; } diff --git a/tensorflow/java/src/main/java/org/tensorflow/Tensors.java b/tensorflow/java/src/main/java/org/tensorflow/Tensors.java new file mode 100644 index 0000000000..c828d23efc --- /dev/null +++ b/tensorflow/java/src/main/java/org/tensorflow/Tensors.java @@ -0,0 +1,447 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** Type-safe factory methods for creating {@link org.tensorflow.Tensor} objects. */ +public final class Tensors { + private Tensors() {} + + /** + * Creates a scalar String tensor using the default, UTF-8 encoding. + * + * @param data The string to put into the new scalar tensor. + */ + public static Tensor create(String data) { + return Tensor.create(data.getBytes(UTF_8), String.class); + } + + /** + * Creates a scalar String tensor using a specified encoding. + * + * @param charset The encoding from String to bytes. + * @param data The string to put into the new scalar tensor. + */ + public static Tensor create(String data, java.nio.charset.Charset charset) { + return Tensor.create(data.getBytes(charset), String.class); + } + + /** + * Creates a scalar tensor containing a single {@code float} element. + * + * @param data The value to put into the new scalar tensor. + */ + public static Tensor create(float data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-1 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-2 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[][] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-3 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[][][] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-4 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[][][][] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-5 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[][][][][] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a rank-6 tensor of {@code float} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(float[][][][][][] data) { + return Tensor.create(data, Float.class); + } + + /** + * Creates a scalar tensor containing a single {@code double} element. + * + * @param data The value to put into the new scalar tensor. + */ + public static Tensor create(double data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-1 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-2 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[][] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-3 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[][][] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-4 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[][][][] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-5 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[][][][][] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a rank-6 tensor of {@code double} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(double[][][][][][] data) { + return Tensor.create(data, Double.class); + } + + /** + * Creates a scalar tensor containing a single {@code int} element. + * + * @param data The value to put into the new scalar tensor. + */ + public static Tensor create(int data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-1 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-2 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[][] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-3 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[][][] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-4 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[][][][] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-5 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[][][][][] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a rank-6 tensor of {@code int} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(int[][][][][][] data) { + return Tensor.create(data, Integer.class); + } + + /** + * Creates a scalar tensor containing a single {@code byte} element. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a rank-1 tensor of {@code byte} elements. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[][] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a rank-2 tensor of {@code byte} elements. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[][][] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a rank-3 tensor of {@code byte} elements. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[][][][] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a rank-4 tensor of {@code byte} elements. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[][][][][] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a rank-5 tensor of {@code byte} elements. + * + * @param data An array containing the data to put into the new tensor. String elements are + * sequences of bytes from the last array dimension. + */ + public static Tensor create(byte[][][][][][] data) { + return Tensor.create(data, String.class); + } + + /** + * Creates a scalar tensor containing a single {@code long} element. + * + * @param data The value to put into the new scalar tensor. + */ + public static Tensor create(long data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-1 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-2 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[][] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-3 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[][][] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-4 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[][][][] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-5 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[][][][][] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a rank-6 tensor of {@code long} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(long[][][][][][] data) { + return Tensor.create(data, Long.class); + } + + /** + * Creates a scalar tensor containing a single {@code boolean} element. + * + * @param data The value to put into the new scalar tensor. + */ + public static Tensor create(boolean data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-1 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[] data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-2 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[][] data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-3 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[][][] data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-4 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[][][][] data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-5 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[][][][][] data) { + return Tensor.create(data, Boolean.class); + } + + /** + * Creates a rank-6 tensor of {@code boolean} elements. + * + * @param data An array containing the values to put into the new tensor. The dimensions of the + * new tensor will match those of the array. + */ + public static Tensor create(boolean[][][][][][] data) { + return Tensor.create(data, Boolean.class); + } +} diff --git a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java index 19929188a5..489e95c310 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java +++ b/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java @@ -29,6 +29,7 @@ import org.tensorflow.Output; import org.tensorflow.Session; import org.tensorflow.Tensor; import org.tensorflow.TensorFlow; +import org.tensorflow.types.UInt8; /** Sample use of the TensorFlow Java API to label images using a pre-trained model. */ public class LabelImage { @@ -61,17 +62,17 @@ public class LabelImage { readAllLinesOrExit(Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt")); byte[] imageBytes = readAllBytesOrExit(Paths.get(imageFile)); - try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) { + try (Tensor image = constructAndExecuteGraphToNormalizeImage(imageBytes)) { float[] labelProbabilities = executeInceptionGraph(graphDef, image); int bestLabelIdx = maxIndex(labelProbabilities); System.out.println( - String.format( - "BEST MATCH: %s (%.2f%% likely)", - labels.get(bestLabelIdx), labelProbabilities[bestLabelIdx] * 100f)); + String.format("BEST MATCH: %s (%.2f%% likely)", + labels.get(bestLabelIdx), + labelProbabilities[bestLabelIdx] * 100f)); } } - private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { + private static Tensor constructAndExecuteGraphToNormalizeImage(byte[] imageBytes) { try (Graph g = new Graph()) { GraphBuilder b = new GraphBuilder(g); // Some constants specific to the pre-trained model at: @@ -88,28 +89,29 @@ public class LabelImage { // Since the graph is being constructed once per execution here, we can use a constant for the // input image. If the graph were to be re-used for multiple input images, a placeholder would // have been more appropriate. - final Output input = b.constant("input", imageBytes); - final Output output = + final Output input = b.constant("input", imageBytes); + final Output output = b.div( b.sub( b.resizeBilinear( b.expandDims( - b.cast(b.decodeJpeg(input, 3), DataType.FLOAT), + b.cast(b.decodeJpeg(input, 3), Float.class), b.constant("make_batch", 0)), b.constant("size", new int[] {H, W})), b.constant("mean", mean)), b.constant("scale", scale)); try (Session s = new Session(g)) { - return s.runner().fetch(output.op().name()).run().get(0); + return s.runner().fetch(output.op().name()).run().get(0).expect(Float.class); } } } - private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) { + private static float[] executeInceptionGraph(byte[] graphDef, Tensor image) { try (Graph g = new Graph()) { g.importGraphDef(graphDef); try (Session s = new Session(g); - Tensor result = s.runner().feed("input", image).fetch("output").run().get(0)) { + Tensor result = + s.runner().feed("input", image).fetch("output").run().get(0).expect(Float.class)) { final long[] rshape = result.shape(); if (result.numDimensions() != 2 || rshape[0] != 1) { throw new RuntimeException( @@ -161,48 +163,71 @@ public class LabelImage { this.g = g; } - Output div(Output x, Output y) { + Output div(Output x, Output y) { return binaryOp("Div", x, y); } - Output sub(Output x, Output y) { + Output sub(Output x, Output y) { return binaryOp("Sub", x, y); } - Output resizeBilinear(Output images, Output size) { - return binaryOp("ResizeBilinear", images, size); + Output resizeBilinear(Output images, Output size) { + return binaryOp3("ResizeBilinear", images, size); } - Output expandDims(Output input, Output dim) { - return binaryOp("ExpandDims", input, dim); + Output expandDims(Output input, Output dim) { + return binaryOp3("ExpandDims", input, dim); } - Output cast(Output value, DataType dtype) { - return g.opBuilder("Cast", "Cast").addInput(value).setAttr("DstT", dtype).build().output(0); + Output cast(Output value, Class type) { + DataType dtype = DataType.fromClass(type); + return g.opBuilder("Cast", "Cast") + .addInput(value) + .setAttr("DstT", dtype) + .build() + .output(0); } - Output decodeJpeg(Output contents, long channels) { + Output decodeJpeg(Output contents, long channels) { return g.opBuilder("DecodeJpeg", "DecodeJpeg") .addInput(contents) .setAttr("channels", channels) .build() - .output(0); + .output(0); } - Output constant(String name, Object value) { - try (Tensor t = Tensor.create(value)) { + Output constant(String name, Object value, Class type) { + try (Tensor t = Tensor.create(value, type)) { return g.opBuilder("Const", name) - .setAttr("dtype", t.dataType()) + .setAttr("dtype", DataType.fromClass(type)) .setAttr("value", t) .build() - .output(0); + .output(0); } } + Output constant(String name, byte[] value) { + return this.constant(name, value, String.class); + } - private Output binaryOp(String type, Output in1, Output in2) { - return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + Output constant(String name, int value) { + return this.constant(name, value, Integer.class); } + Output constant(String name, int[] value) { + return this.constant(name, value, Integer.class); + } + + Output constant(String name, float value) { + return this.constant(name, value, Float.class); + } + + private Output binaryOp(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } + + private Output binaryOp3(String type, Output in1, Output in2) { + return g.opBuilder(type, type).addInput(in1).addInput(in2).build().output(0); + } private Graph g; } } diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/Operands.java b/tensorflow/java/src/main/java/org/tensorflow/op/Operands.java index 5971103d6d..ac48da8032 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/Operands.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/Operands.java @@ -33,12 +33,12 @@ public final class Operands { * @param inputs an iteration of input operands * @return an array of outputs */ - public static Output[] asOutputs(Iterable inputs) { - List outputList = new ArrayList<>(); - for (Operand input : inputs) { + public static Output[] asOutputs(Iterable> inputs) { + List> outputList = new ArrayList<>(); + for (Operand input : inputs) { outputList.add(input.asOutput()); } - return outputList.toArray(new Output[outputList.size()]); + return outputList.toArray(new Output[outputList.size()]); } // Disabled constructor diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Constant.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Constant.java index cd7931d3bb..725c81765a 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/op/core/Constant.java +++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Constant.java @@ -31,7 +31,7 @@ import org.tensorflow.op.annotation.Operator; /** An operator producing a constant value. */ @Operator -public final class Constant extends PrimitiveOp implements Operand { +public final class Constant extends PrimitiveOp implements Operand { /** * Create a constant from a Java object. * @@ -47,8 +47,8 @@ public final class Constant extends PrimitiveOp implements Operand { * @param object a Java object representing the constant. * @see org.tensorflow.Tensor#create(Object) Tensor.create */ - public static Constant create(Scope scope, Object object) { - try (Tensor value = Tensor.create(object)) { + public static Constant create(Scope scope, Object object, Class type) { + try (Tensor value = Tensor.create(object, type)) { return createWithTensor(scope, value); } } @@ -66,8 +66,8 @@ public final class Constant extends PrimitiveOp implements Operand { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Constant create(Scope scope, long[] shape, IntBuffer data) { - try (Tensor value = Tensor.create(shape, data)) { + public static Constant create(Scope scope, long[] shape, IntBuffer data) { + try (Tensor value = Tensor.create(shape, data)) { return createWithTensor(scope, value); } } @@ -85,8 +85,8 @@ public final class Constant extends PrimitiveOp implements Operand { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Constant create(Scope scope, long[] shape, FloatBuffer data) { - try (Tensor value = Tensor.create(shape, data)) { + public static Constant create(Scope scope, long[] shape, FloatBuffer data) { + try (Tensor value = Tensor.create(shape, data)) { return createWithTensor(scope, value); } } @@ -104,8 +104,8 @@ public final class Constant extends PrimitiveOp implements Operand { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Constant create(Scope scope, long[] shape, DoubleBuffer data) { - try (Tensor value = Tensor.create(shape, data)) { + public static Constant create(Scope scope, long[] shape, DoubleBuffer data) { + try (Tensor value = Tensor.create(shape, data)) { return createWithTensor(scope, value); } } @@ -123,8 +123,8 @@ public final class Constant extends PrimitiveOp implements Operand { * @param data a buffer containing the tensor data. * @throws IllegalArgumentException If the tensor shape is not compatible with the buffer */ - public static Constant create(Scope scope, long[] shape, LongBuffer data) { - try (Tensor value = Tensor.create(shape, data)) { + public static Constant create(Scope scope, long[] shape, LongBuffer data) { + try (Tensor value = Tensor.create(shape, data)) { return createWithTensor(scope, value); } } @@ -143,14 +143,14 @@ public final class Constant extends PrimitiveOp implements Operand { * @throws IllegalArgumentException If the tensor datatype or shape is not compatible with the * buffer */ - public static Constant create(Scope scope, DataType dataType, long[] shape, ByteBuffer data) { - try (Tensor value = Tensor.create(dataType, shape, data)) { + public static Constant create(Scope scope, Class type, long[] shape, ByteBuffer data) { + try (Tensor value = Tensor.create(type, shape, data)) { return createWithTensor(scope, value); } } - private static Constant createWithTensor(Scope scope, Tensor value) { - return new Constant( + private static Constant createWithTensor(Scope scope, Tensor value) { + return new Constant( scope .graph() .opBuilder("Const", scope.makeOpName("Const")) @@ -160,7 +160,7 @@ public final class Constant extends PrimitiveOp implements Operand { } @Override - public Output asOutput() { + public Output asOutput() { return output; } @@ -169,5 +169,5 @@ public final class Constant extends PrimitiveOp implements Operand { output = operation.output(0); } - private final Output output; + private final Output output; } diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/UInt8.java b/tensorflow/java/src/main/java/org/tensorflow/types/UInt8.java new file mode 100644 index 0000000000..0c751aed9f --- /dev/null +++ b/tensorflow/java/src/main/java/org/tensorflow/types/UInt8.java @@ -0,0 +1,21 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.types; + +/** Represents an 8-bit unsigned integer. */ +public class UInt8 { + private UInt8() {} +} diff --git a/tensorflow/java/src/main/java/org/tensorflow/types/package-info.java b/tensorflow/java/src/main/java/org/tensorflow/types/package-info.java index f1410a760e..96018c5366 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/types/package-info.java +++ b/tensorflow/java/src/main/java/org/tensorflow/types/package-info.java @@ -15,13 +15,15 @@ limitations under the License. /** * Defines classes that represent TensorFlow data types. For each possible data type - * that can be used in a tensor, there is a corresponding class in this package that + * that can be used in a tensor, there is a corresponding class that * is used to represent it. For example, the TensorFlow int32 type is represented by - * the type TFInt32 and by the class object TFInt32.class. The former is used to - * support compile-time checking of tensor data types and the latter is used for - * run-time checking of data types. All such classes implement the TFType interface. - * TensorFlow data types are also separately represented by the DataType enum, with - * one enum value per data type. The enum representation should rarely be needed, but - * the Types class can be used to obtain it from the class object representation. + * the type {@link Integer} and by the class object {@code Integer.class}. The former is used to + * support compile-time checking of tensor element types and the latter is used for + * run-time checking of element types. Classes appearing in this package, such as + * UInt8, represent TensorFlow data types for which there is no existing Java equivalent. + * + *

TensorFlow element types are also separately represented by the {@link DataType} enum, with + * one enum value per element type. The enum representation is not usually needed, but + * can be obtained using {@link DataType.fromClass}. */ package org.tensorflow.types; diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java index 4adc861bf1..c540299bdc 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertTrue; import java.util.HashSet; import java.util.Iterator; - import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java index b3bc3aaef9..6dc233987b 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java @@ -34,8 +34,8 @@ public class OperationBuilderTest { public void failWhenMixingOperationsOnDifferentGraphs() { try (Graph g1 = new Graph(); Graph g2 = new Graph()) { - Output c1 = TestUtil.constant(g1, "C1", 3); - Output c2 = TestUtil.constant(g2, "C2", 3); + Output c1 = TestUtil.constant(g1, "C1", 3); + Output c2 = TestUtil.constant(g2, "C2", 3); TestUtil.addN(g1, c1, c1); try { TestUtil.addN(g2, c1, c2); @@ -48,7 +48,7 @@ public class OperationBuilderTest { @Test public void failOnUseAfterBuild() { try (Graph g = new Graph(); - Tensor t = Tensor.create(1)) { + Tensor t = Tensors.create(1)) { OperationBuilder b = g.opBuilder("Const", "Const").setAttr("dtype", t.dataType()).setAttr("value", t); b.build(); @@ -64,7 +64,7 @@ public class OperationBuilderTest { public void failOnUseAfterGraphClose() { OperationBuilder b = null; try (Graph g = new Graph(); - Tensor t = Tensor.create(1)) { + Tensor t = Tensors.create(1)) { b = g.opBuilder("Const", "Const").setAttr("dtype", t.dataType()).setAttr("value", t); } try { @@ -85,7 +85,7 @@ public class OperationBuilderTest { // types that aren't inferred from the input arguments. try (Graph g = new Graph()) { // dtype, tensor attributes. - try (Tensor t = Tensor.create(1)) { + try (Tensor t = Tensors.create(1)) { g.opBuilder("Const", "DataTypeAndTensor") .setAttr("dtype", DataType.INT32) .setAttr("value", t) @@ -101,7 +101,7 @@ public class OperationBuilderTest { assertTrue(hasNode(g, "StringAndBool")); // int (TF "int" attributes are 64-bit signed, so a Java long). g.opBuilder("RandomUniform", "Int") - .addInput(TestUtil.constant(g, "RandomUniformShape", new int[]{1})) + .addInput(TestUtil.constant(g, "RandomUniformShape", new int[] {1})) .setAttr("seed", 10) .setAttr("dtype", DataType.FLOAT) .build(); @@ -127,7 +127,7 @@ public class OperationBuilderTest { @Test public void setAttrShape() { try (Graph g = new Graph()) { - Output n = + Output n = g.opBuilder("Placeholder", "unknown") .setAttr("dtype", DataType.FLOAT) .setAttr("shape", Shape.unknown()) @@ -136,8 +136,7 @@ public class OperationBuilderTest { assertEquals(-1, n.shape().numDimensions()); assertEquals(DataType.FLOAT, n.dataType()); - n = - g.opBuilder("Placeholder", "batch_of_vectors") + n = g.opBuilder("Placeholder", "batch_of_vectors") .setAttr("dtype", DataType.FLOAT) .setAttr("shape", Shape.make(-1, 784)) .build() @@ -153,13 +152,13 @@ public class OperationBuilderTest { public void addControlInput() { try (Graph g = new Graph(); Session s = new Session(g); - Tensor yes = Tensor.create(true); - Tensor no = Tensor.create(false)) { - Output placeholder = TestUtil.placeholder(g, "boolean", DataType.BOOL); + Tensor yes = Tensors.create(true); + Tensor no = Tensors.create(false)) { + Output placeholder = TestUtil.placeholder(g, "boolean", Boolean.class); Operation check = g.opBuilder("Assert", "assert") .addInput(placeholder) - .addInputList(new Output[] {placeholder}) + .addInputList(new Output[] {placeholder}) .build(); Operation noop = g.opBuilder("NoOp", "noop").addControlInput(check).build(); diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationTest.java index aade375db8..6fe3b3c327 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/OperationTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/OperationTest.java @@ -24,7 +24,6 @@ import static org.junit.Assert.fail; import java.util.Arrays; import java.util.HashSet; import java.util.Set; - import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -104,9 +103,9 @@ public class OperationTest { @Test public void outputEquality() { try (Graph g = new Graph()) { - Output output = TestUtil.constant(g, "c", 1); - Output output1 = output.op().output(0); - Output output2 = g.operation("c").output(0); + Output output = TestUtil.constant(g, "c", 1); + Output output1 = output.op().output(0); + Output output2 = g.operation("c").output(0); assertEquals(output, output1); assertEquals(output.hashCode(), output1.hashCode()); assertEquals(output, output2); @@ -117,10 +116,10 @@ public class OperationTest { @Test public void outputCollection() { try (Graph g = new Graph()) { - Output output = TestUtil.constant(g, "c", 1); - Output output1 = output.op().output(0); - Output output2 = g.operation("c").output(0); - Set ops = new HashSet<>(); + Output output = TestUtil.constant(g, "c", 1); + Output output1 = output.op().output(0); + Output output2 = g.operation("c").output(0); + Set> ops = new HashSet<>(); ops.addAll(Arrays.asList(output, output1, output2)); assertEquals(1, ops.size()); assertTrue(ops.contains(output)); @@ -132,7 +131,7 @@ public class OperationTest { @Test public void outputToString() { try (Graph g = new Graph()) { - Output output = TestUtil.constant(g, "c", new int[] {1}); + Output output = TestUtil.constant(g, "c", new int[] {1}); assertNotNull(output.toString()); } } @@ -158,7 +157,7 @@ public class OperationTest { public void outputList() { try (Graph g = new Graph()) { Operation split = TestUtil.split(g, "split", new int[] {0, 1, 2}, 3); - Output[] outputs = split.outputList(1, 2); + Output[] outputs = split.outputList(1, 2); assertNotNull(outputs); assertEquals(2, outputs.length); for (int i = 0; i < outputs.length; ++i) { diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java index 50bdf351e3..a86b4dd117 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java @@ -35,9 +35,9 @@ public class SessionTest { try (Graph g = new Graph(); Session s = new Session(g)) { TestUtil.transpose_A_times_X(g, new int[][] {{2}, {3}}); - try (Tensor x = Tensor.create(new int[][] {{5}, {7}}); - AutoCloseableList outputs = - new AutoCloseableList(s.runner().feed("X", x).fetch("Y").run())) { + try (Tensor x = Tensors.create(new int[][] {{5}, {7}}); + AutoCloseableList> outputs = + new AutoCloseableList>(s.runner().feed("X", x).fetch("Y").run())) { assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -50,11 +50,11 @@ public class SessionTest { try (Graph g = new Graph(); Session s = new Session(g)) { TestUtil.transpose_A_times_X(g, new int[][] {{2}, {3}}); - Output feed = g.operation("X").output(0); - Output fetch = g.operation("Y").output(0); - try (Tensor x = Tensor.create(new int[][] {{5}, {7}}); - AutoCloseableList outputs = - new AutoCloseableList(s.runner().feed(feed, x).fetch(fetch).run())) { + Output feed = g.operation("X").output(0); + Output fetch = g.operation("Y").output(0); + try (Tensor x = Tensors.create(new int[][] {{5}, {7}}); + AutoCloseableList> outputs = + new AutoCloseableList>(s.runner().feed(feed, x).fetch(fetch).run())) { assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -78,14 +78,21 @@ public class SessionTest { .build() .output(0); // Fetch using colon separated names. - try (Tensor fetched = s.runner().fetch("Split:1").run().get(0)) { + try (Tensor fetched = + s.runner().fetch("Split:1").run().get(0).expect(Integer.class)) { final int[] expected = {3, 4}; assertArrayEquals(expected, fetched.copyTo(new int[2])); } // Feed using colon separated names. - try (Tensor fed = Tensor.create(new int[] {4, 3, 2, 1}); - Tensor fetched = - s.runner().feed("Split:0", fed).feed("Split:1", fed).fetch("Add").run().get(0)) { + try (Tensor fed = Tensors.create(new int[] {4, 3, 2, 1}); + Tensor fetched = + s.runner() + .feed("Split:0", fed) + .feed("Split:1", fed) + .fetch("Add") + .run() + .get(0) + .expect(Integer.class)) { final int[] expected = {8, 6, 4, 2}; assertArrayEquals(expected, fetched.copyTo(new int[4])); } @@ -97,7 +104,7 @@ public class SessionTest { try (Graph g = new Graph(); Session s = new Session(g)) { TestUtil.transpose_A_times_X(g, new int[][] {{2}, {3}}); - try (Tensor x = Tensor.create(new int[][] {{5}, {7}})) { + try (Tensor x = Tensors.create(new int[][] {{5}, {7}})) { Session.Run result = s.runner() .feed("X", x) @@ -105,7 +112,7 @@ public class SessionTest { .setOptions(fullTraceRunOptions()) .runAndFetchMetadata(); // Sanity check on outputs. - AutoCloseableList outputs = new AutoCloseableList(result.outputs); + AutoCloseableList> outputs = new AutoCloseableList>(result.outputs); assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -117,6 +124,7 @@ public class SessionTest { assertTrue(md.toString(), md.hasStepStats()); */ assertTrue(result.metadata.length > 0); + outputs.close(); } } } @@ -127,11 +135,12 @@ public class SessionTest { Session s = new Session(g)) { TestUtil.constant(g, "c1", 2718); TestUtil.constant(g, "c2", 31415); - AutoCloseableList outputs = - new AutoCloseableList(s.runner().fetch("c2").fetch("c1").run()); + AutoCloseableList> outputs = + new AutoCloseableList>(s.runner().fetch("c2").fetch("c1").run()); assertEquals(2, outputs.size()); assertEquals(31415, outputs.get(0).intValue()); assertEquals(2718, outputs.get(1).intValue()); + outputs.close(); } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index fe46c0184c..3b027700c5 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -61,7 +61,7 @@ public class ShapeTest { @Test public void nodesInAGraph() { try (Graph g = new Graph()) { - Output n = TestUtil.placeholder(g, "feed", DataType.FLOAT); + Output n = TestUtil.placeholder(g, "feed", Float.class); assertEquals(-1, n.shape().numDimensions()); n = TestUtil.constant(g, "scalar", 3); diff --git a/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java b/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java index 036db04503..6538359d11 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TensorTest.java @@ -30,6 +30,7 @@ import java.nio.LongBuffer; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.tensorflow.types.UInt8; /** Unit tests for {@link org.tensorflow.Tensor}. */ @RunWith(JUnit4.class) @@ -47,7 +48,7 @@ public class TensorTest { byte[] strings = "test".getBytes(UTF_8); long[] strings_shape = {}; byte[] strings_; // raw TF_STRING - try (Tensor t = Tensor.create(strings)) { + try (Tensor t = Tensors.create(strings)) { ByteBuffer to = ByteBuffer.allocate(t.numBytes()); t.writeTo(to); strings_ = to.array(); @@ -55,7 +56,7 @@ public class TensorTest { // validate creating a tensor using a byte buffer { - try (Tensor t = Tensor.create(DataType.BOOL, bools_shape, ByteBuffer.wrap(bools_))) { + try (Tensor t = Tensor.create(Boolean.class, bools_shape, ByteBuffer.wrap(bools_))) { boolean[] actual = t.copyTo(new boolean[bools_.length]); for (int i = 0; i < bools.length; ++i) { assertEquals("" + i, bools[i], actual[i]); @@ -63,7 +64,8 @@ public class TensorTest { } // note: the buffer is expected to contain raw TF_STRING (as per C API) - try (Tensor t = Tensor.create(DataType.STRING, strings_shape, ByteBuffer.wrap(strings_))) { + try (Tensor t = + Tensor.create(String.class, strings_shape, ByteBuffer.wrap(strings_))) { assertArrayEquals(strings, t.bytesValue()); } } @@ -72,15 +74,15 @@ public class TensorTest { { ByteBuffer buf = ByteBuffer.allocateDirect(8 * doubles.length).order(ByteOrder.nativeOrder()); buf.asDoubleBuffer().put(doubles); - try (Tensor t = Tensor.create(DataType.DOUBLE, doubles_shape, buf)) { + try (Tensor t = Tensor.create(Double.class, doubles_shape, buf)) { double[] actual = new double[doubles.length]; assertArrayEquals(doubles, t.copyTo(actual), EPSILON); } } // validate shape checking - try (Tensor t = - Tensor.create(DataType.BOOL, new long[bools_.length * 2], ByteBuffer.wrap(bools_))) { + try (Tensor t = + Tensor.create(Boolean.class, new long[bools_.length * 2], ByteBuffer.wrap(bools_))) { fail("should have failed on incompatible buffer"); } catch (IllegalArgumentException e) { // expected @@ -99,7 +101,7 @@ public class TensorTest { .asDoubleBuffer() .put(doubles); buf.flip(); - try (Tensor t = Tensor.create(new long[] {doubles.length}, buf)) { + try (Tensor t = Tensor.create(new long[] {doubles.length}, buf)) { double[] actual = new double[doubles.length]; assertArrayEquals(doubles, t.copyTo(actual), EPSILON); } @@ -115,19 +117,19 @@ public class TensorTest { // validate creating a tensor using a typed buffer { - try (Tensor t = Tensor.create(shape, DoubleBuffer.wrap(doubles))) { + try (Tensor t = Tensor.create(shape, DoubleBuffer.wrap(doubles))) { double[] actual = new double[doubles.length]; assertArrayEquals(doubles, t.copyTo(actual), EPSILON); } - try (Tensor t = Tensor.create(shape, FloatBuffer.wrap(floats))) { + try (Tensor t = Tensor.create(shape, FloatBuffer.wrap(floats))) { float[] actual = new float[floats.length]; assertArrayEquals(floats, t.copyTo(actual), EPSILON_F); } - try (Tensor t = Tensor.create(shape, IntBuffer.wrap(ints))) { + try (Tensor t = Tensor.create(shape, IntBuffer.wrap(ints))) { int[] actual = new int[ints.length]; assertArrayEquals(ints, t.copyTo(actual)); } - try (Tensor t = Tensor.create(shape, LongBuffer.wrap(longs))) { + try (Tensor t = Tensor.create(shape, LongBuffer.wrap(longs))) { long[] actual = new long[longs.length]; assertArrayEquals(longs, t.copyTo(actual)); } @@ -135,22 +137,23 @@ public class TensorTest { // validate shape-checking { - try (Tensor t = Tensor.create(new long[doubles.length + 1], DoubleBuffer.wrap(doubles))) { + try (Tensor t = + Tensor.create(new long[doubles.length + 1], DoubleBuffer.wrap(doubles))) { fail("should have failed on incompatible buffer"); } catch (IllegalArgumentException e) { // expected } - try (Tensor t = Tensor.create(new long[floats.length + 1], FloatBuffer.wrap(floats))) { + try (Tensor t = Tensor.create(new long[floats.length + 1], FloatBuffer.wrap(floats))) { fail("should have failed on incompatible buffer"); } catch (IllegalArgumentException e) { // expected } - try (Tensor t = Tensor.create(new long[ints.length + 1], IntBuffer.wrap(ints))) { + try (Tensor t = Tensor.create(new long[ints.length + 1], IntBuffer.wrap(ints))) { fail("should have failed on incompatible buffer"); } catch (IllegalArgumentException e) { // expected } - try (Tensor t = Tensor.create(new long[longs.length + 1], LongBuffer.wrap(longs))) { + try (Tensor t = Tensor.create(new long[longs.length + 1], LongBuffer.wrap(longs))) { fail("should have failed on incompatible buffer"); } catch (IllegalArgumentException e) { // expected @@ -166,11 +169,11 @@ public class TensorTest { long[] longs = {1L, 2L, 3L}; boolean[] bools = {true, false, true}; - try (Tensor tints = Tensor.create(ints); - Tensor tfloats = Tensor.create(floats); - Tensor tdoubles = Tensor.create(doubles); - Tensor tlongs = Tensor.create(longs); - Tensor tbools = Tensor.create(bools)) { + try (Tensor tints = Tensors.create(ints); + Tensor tfloats = Tensors.create(floats); + Tensor tdoubles = Tensors.create(doubles); + Tensor tlongs = Tensors.create(longs); + Tensor tbools = Tensors.create(bools)) { // validate that any datatype is readable with ByteBuffer (content, position) { @@ -293,35 +296,35 @@ public class TensorTest { @Test public void scalars() { - try (Tensor t = Tensor.create(2.718f)) { + try (Tensor t = Tensors.create(2.718f)) { assertEquals(DataType.FLOAT, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); assertEquals(2.718f, t.floatValue(), EPSILON_F); } - try (Tensor t = Tensor.create(3.1415)) { + try (Tensor t = Tensors.create(3.1415)) { assertEquals(DataType.DOUBLE, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); assertEquals(3.1415, t.doubleValue(), EPSILON); } - try (Tensor t = Tensor.create(-33)) { + try (Tensor t = Tensors.create(-33)) { assertEquals(DataType.INT32, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); assertEquals(-33, t.intValue()); } - try (Tensor t = Tensor.create(8589934592L)) { + try (Tensor t = Tensors.create(8589934592L)) { assertEquals(DataType.INT64, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); assertEquals(8589934592L, t.longValue()); } - try (Tensor t = Tensor.create(true)) { + try (Tensor t = Tensors.create(true)) { assertEquals(DataType.BOOL, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); @@ -329,7 +332,7 @@ public class TensorTest { } final byte[] bytes = {1, 2, 3, 4}; - try (Tensor t = Tensor.create(bytes)) { + try (Tensor t = Tensors.create(bytes)) { assertEquals(DataType.STRING, t.dataType()); assertEquals(0, t.numDimensions()); assertEquals(0, t.shape().length); @@ -340,7 +343,7 @@ public class TensorTest { @Test public void nDimensional() { double[] vector = {1.414, 2.718, 3.1415}; - try (Tensor t = Tensor.create(vector)) { + try (Tensor t = Tensors.create(vector)) { assertEquals(DataType.DOUBLE, t.dataType()); assertEquals(1, t.numDimensions()); assertArrayEquals(new long[] {3}, t.shape()); @@ -350,7 +353,7 @@ public class TensorTest { } int[][] matrix = {{1, 2, 3}, {4, 5, 6}}; - try (Tensor t = Tensor.create(matrix)) { + try (Tensor t = Tensors.create(matrix)) { assertEquals(DataType.INT32, t.dataType()); assertEquals(2, t.numDimensions()); assertArrayEquals(new long[] {2, 3}, t.shape()); @@ -362,7 +365,7 @@ public class TensorTest { long[][][] threeD = { {{1}, {3}, {5}, {7}, {9}}, {{2}, {4}, {6}, {8}, {0}}, }; - try (Tensor t = Tensor.create(threeD)) { + try (Tensor t = Tensors.create(threeD)) { assertEquals(DataType.INT64, t.dataType()); assertEquals(3, t.numDimensions()); assertArrayEquals(new long[] {2, 5, 1}, t.shape()); @@ -376,7 +379,7 @@ public class TensorTest { {{{false, false, true, true}, {false, true, false, false}}}, {{{false, true, false, true}, {false, true, true, false}}}, }; - try (Tensor t = Tensor.create(fourD)) { + try (Tensor t = Tensors.create(fourD)) { assertEquals(DataType.BOOL, t.dataType()); assertEquals(4, t.numDimensions()); assertArrayEquals(new long[] {3, 1, 2, 4}, t.shape()); @@ -394,7 +397,7 @@ public class TensorTest { matrix[i][j] = String.format("(%d, %d) = %d", i, j, i << j).getBytes(UTF_8); } } - try (Tensor t = Tensor.create(matrix)) { + try (Tensor t = Tensors.create(matrix)) { assertEquals(DataType.STRING, t.dataType()); assertEquals(2, t.numDimensions()); assertArrayEquals(new long[] {4, 3}, t.shape()); @@ -412,14 +415,24 @@ public class TensorTest { @Test public void testUInt8Tensor() { - byte[] vector = new byte[] { 1, 2, 3, 4 }; - try (Tensor t = Tensor.create(vector, DataType.UINT8)) { + byte[] vector = new byte[] {1, 2, 3, 4}; + try (Tensor t = Tensor.create(vector, UInt8.class)) { assertEquals(DataType.UINT8, t.dataType()); assertEquals(1, t.numDimensions()); assertArrayEquals(new long[] {4}, t.shape()); byte[] got = t.copyTo(new byte[4]); - assertArrayEquals(got, vector); + assertArrayEquals(vector, got); + } + } + + @Test + public void testCreateFromArrayOfBoxed() { + Integer[] vector = new Integer[] {1, 2, 3, 4}; + try (Tensor t = Tensor.create(vector, Integer.class)) { + fail("Tensor.create() should fail because it was given an array of boxed values"); + } catch (IllegalArgumentException e) { + // The expected exception } } @@ -431,7 +444,7 @@ public class TensorTest { invalid[x][y] = new int[x + y + 1]; } } - try (Tensor t = Tensor.create(invalid)) { + try (Tensor t = Tensor.create(invalid)) { fail("Tensor.create() should fail because of differing sizes in the 3rd dimension"); } catch (IllegalArgumentException e) { // The expected exception. @@ -440,7 +453,7 @@ public class TensorTest { @Test public void failCopyToOnIncompatibleDestination() { - try (final Tensor matrix = Tensor.create(new int[][] {{1, 2}, {3, 4}})) { + try (final Tensor matrix = Tensors.create(new int[][] {{1, 2}, {3, 4}})) { try { matrix.copyTo(new int[2]); fail("should have failed on dimension mismatch"); @@ -466,7 +479,7 @@ public class TensorTest { @Test public void failCopyToOnScalar() { - try (final Tensor scalar = Tensor.create(3)) { + try (final Tensor scalar = Tensors.create(3)) { try { scalar.copyTo(3); fail("copyTo should fail on scalar tensors, suggesting use of primitive accessors instead"); @@ -478,8 +491,8 @@ public class TensorTest { @Test public void failOnArbitraryObject() { - try (Tensor t = Tensor.create(new Object())) { - fail("should fail on creating a Tensor with a Java object that has not equivalent DataType"); + try (Tensor t = Tensor.create(new Object())) { + fail("should fail on creating a Tensor with a Java object that has no equivalent DataType"); } catch (IllegalArgumentException e) { // The expected exception. } @@ -487,7 +500,7 @@ public class TensorTest { @Test public void failOnZeroDimension() { - try (Tensor t = Tensor.create(new int[3][0][1])) { + try (Tensor t = Tensors.create(new int[3][0][1])) { fail("should fail on creating a Tensor where one of the dimensions is 0"); } catch (IllegalArgumentException e) { // The expected exception. @@ -497,7 +510,7 @@ public class TensorTest { @Test public void useAfterClose() { int n = 4; - Tensor t = Tensor.create(n); + Tensor t = Tensor.create(n); t.close(); try { t.intValue(); @@ -515,8 +528,8 @@ public class TensorTest { // An exception is made for this test, where the pitfalls of this is avoided by not calling // close() on both Tensors. final float[][] matrix = {{1, 2, 3}, {4, 5, 6}}; - try (Tensor src = Tensor.create(matrix)) { - Tensor cpy = Tensor.fromHandle(src.getNativeHandle()); + try (Tensor src = Tensors.create(matrix)) { + Tensor cpy = Tensor.fromHandle(src.getNativeHandle()).expect(Float.class); assertEquals(src.dataType(), cpy.dataType()); assertEquals(src.numDimensions(), cpy.numDimensions()); assertArrayEquals(src.shape(), cpy.shape()); diff --git a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java index e3415a696d..c973b5a3d8 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java @@ -19,33 +19,36 @@ import java.lang.reflect.Array; /** Static utility functions. */ public class TestUtil { - public static Output constant(Graph g, String name, Object value) { - try (Tensor t = Tensor.create(value)) { + public static Output constant(Graph g, String name, Object value) { + try (Tensor t = Tensor.create(value)) { return g.opBuilder("Const", name) .setAttr("dtype", t.dataType()) .setAttr("value", t) .build() - .output(0); + .output(0); } } - public static Output placeholder(Graph g, String name, DataType dtype) { - return g.opBuilder("Placeholder", name).setAttr("dtype", dtype).build().output(0); + public static Output placeholder(Graph g, String name, Class type) { + return g.opBuilder("Placeholder", name) + .setAttr("dtype", DataType.fromClass(type)) + .build() + .output(0); } - public static Output addN(Graph g, Output... inputs) { + public static Output addN(Graph g, Output... inputs) { return g.opBuilder("AddN", "AddN").addInputList(inputs).build().output(0); } - public static Output matmul( - Graph g, String name, Output a, Output b, boolean transposeA, boolean transposeB) { + public static Output matmul( + Graph g, String name, Output a, Output b, boolean transposeA, boolean transposeB) { return g.opBuilder("MatMul", name) .addInput(a) .addInput(b) .setAttr("transpose_a", transposeA) .setAttr("transpose_b", transposeB) .build() - .output(0); + .output(0); } public static Operation split(Graph g, String name, int[] values, int numSplit) { @@ -57,7 +60,8 @@ public class TestUtil { } public static void transpose_A_times_X(Graph g, int[][] a) { - matmul(g, "Y", constant(g, "A", a), placeholder(g, "X", DataType.INT32), true, false); + Output aa = constant(g, "A", a); + matmul(g, "Y", aa, placeholder(g, "X", Integer.class), true, false); } /** diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/OperandsTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/OperandsTest.java index 4fdd150acc..79bfcc8354 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/OperandsTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/OperandsTest.java @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 +http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -36,8 +36,9 @@ public class OperandsTest { public void createOutputArrayFromOperandList() { try (Graph g = new Graph()) { Operation split = TestUtil.split(g, "split", new int[] {0, 1, 2}, 3); - List list = Arrays.asList(split.output(0), split.output(2)); - Output[] array = Operands.asOutputs(list); + List> list = + Arrays.asList(split.output(0), split.output(2)); + Output[] array = Operands.asOutputs(list); assertEquals(list.size(), array.length); assertSame(array[0], list.get(0)); assertSame(array[1], list.get(1)); diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/PrimitiveOpTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/PrimitiveOpTest.java index b24bf5a476..e02c38ed22 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/PrimitiveOpTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/PrimitiveOpTest.java @@ -36,7 +36,7 @@ public class PrimitiveOpTest { @Test public void equalsHashcode() { try (Graph g = new Graph()) { - Output array = TestUtil.constant(g, "array", new int[2]); + Output array = TestUtil.constant(g, "array", new int[2]); PrimitiveOp test1 = new PrimitiveOp(g.opBuilder("Shape", "shape1").addInput(array).build()) {}; diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java index 9256cb281d..125de73554 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/ScopeTest.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; +import java.util.HashMap; +import java.util.Map; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -26,6 +28,8 @@ import org.tensorflow.Graph; import org.tensorflow.Output; import org.tensorflow.Session; import org.tensorflow.Tensor; +import org.tensorflow.Tensors; +import org.tensorflow.types.UInt8; /** Unit tests for {@link org.tensorflow.Scope}. */ @RunWith(JUnit4.class) @@ -122,13 +126,13 @@ public class ScopeTest { public void basic() { try (Graph g = new Graph()) { Scope s = new Scope(g); - Const c1 = Const.create(s, 42); + Const c1 = Const.create(s, 42); assertEquals("Const", c1.output().op().name()); - Const c2 = Const.create(s, 7); + Const c2 = Const.create(s, 7); assertEquals("Const_1", c2.output().op().name()); - Const c3 = Const.create(s.withName("four"), 4); + Const c3 = Const.create(s.withName("four"), 4); assertEquals("four", c3.output().op().name()); - Const c4 = Const.create(s.withName("four"), 4); + Const c4 = Const.create(s.withName("four"), 4); assertEquals("four_1", c4.output().op().name()); } } @@ -148,122 +152,164 @@ public class ScopeTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope s = new Scope(g); - Output data = Const.create(s.withName("data"), new int[] {600, 470, 170, 430, 300}).output(); + Output data = + Const.create(s.withName("data"), new int[] {600, 470, 170, 430, 300}).output(); // Create a composite op with a customized name - Variance var1 = Variance.create(s.withName("example"), data); + Variance var1 = Variance.create(s.withName("example"), data, Integer.class); assertEquals("example/variance", var1.output().op().name()); // Confirm internally added ops have the right names. assertNotNull(g.operation("example/squared_deviation")); assertNotNull(g.operation("example/Mean")); - assertNotNull(g.operation("example/zero")); + // assertNotNull(g.operation("example/zero")); // Same composite op with a default name - Variance var2 = Variance.create(s, data); + Variance var2 = Variance.create(s, data, Integer.class); assertEquals("variance/variance", var2.output().op().name()); // Confirm internally added ops have the right names. assertNotNull(g.operation("variance/squared_deviation")); assertNotNull(g.operation("variance/Mean")); - assertNotNull(g.operation("variance/zero")); + // assertNotNull(g.operation("variance/zero")); // Verify correct results as well. - Tensor result = sess.runner().fetch(var1.output()).run().get(0); + Tensor result = + sess.runner().fetch(var1.output()).run().get(0).expect(Integer.class); assertEquals(21704, result.intValue()); - result = sess.runner().fetch(var2.output()).run().get(0); + result = sess.runner().fetch(var2.output()).run().get(0).expect(Integer.class); assertEquals(21704, result.intValue()); } } // "handwritten" sample operator classes - private static final class Const { - private final Output output; + private static final class Const { + private final Output output; - static Const create(Scope s, Object v) { - try (Tensor value = Tensor.create(v)) { - return new Const( + static Const create(Scope s, int v) { + return create(s, Tensors.create(v)); + } + + static Const create(Scope s, int[] v) { + return create(s, Tensors.create(v)); + } + + static Const create(Scope s, Tensor value) { + return new Const( + s.graph() + .opBuilder("Const", s.makeOpName("Const")) + .setAttr("dtype", value.dataType()) + .setAttr("value", value) + .build() + .output(0)); + } + + static Const create(Scope s, Object v, Class type) { + try (Tensor value = Tensor.create(v, type)) { + return new Const( s.graph() .opBuilder("Const", s.makeOpName("Const")) .setAttr("dtype", value.dataType()) .setAttr("value", value) .build() - .output(0)); + .output(0)); } } - Const(Output o) { + Const(Output o) { output = o; } - Output output() { + Output output() { return output; } } - private static final class Mean { - private final Output output; + private static final class Mean { + private final Output output; - static Mean create(Scope s, Output input, Output reductionIndices) { - return new Mean( + static Mean create(Scope s, Output input, Output reductionIndices) { + return new Mean( s.graph() .opBuilder("Mean", s.makeOpName("Mean")) .addInput(input) .addInput(reductionIndices) .build() - .output(0)); + .output(0)); } - Mean(Output o) { + Mean(Output o) { output = o; } - Output output() { + Output output() { return output; } } - private static final class SquaredDifference { - private final Output output; + private static final class SquaredDifference { + private final Output output; - static SquaredDifference create(Scope s, Output x, Output y) { - return new SquaredDifference( + static SquaredDifference create(Scope s, Output x, Output y) { + return new SquaredDifference( s.graph() .opBuilder("SquaredDifference", s.makeOpName("SquaredDifference")) .addInput(x) .addInput(y) .build() - .output(0)); + .output(0)); } - SquaredDifference(Output o) { + SquaredDifference(Output o) { output = o; } - Output output() { + Output output() { return output; } } - private static final class Variance { - private final Output output; + /** + * Returns the zero value of type described by {@code c}, or null if the type (e.g., string) is + * not numeric and therefore has no zero value. + * + * @param c The class describing the TensorFlow type of interest. + */ + public static Object zeroValue(Class c) { + return zeros.get(c); + } + + private static final Map, Object> zeros = new HashMap<>(); + + static { + zeros.put(Float.class, 0.0f); + zeros.put(Double.class, 0.0); + zeros.put(Integer.class, 0); + zeros.put(UInt8.class, (byte) 0); + zeros.put(Long.class, 0L); + zeros.put(Boolean.class, false); + zeros.put(String.class, null); // no zero value + } + + private static final class Variance { + private final Output output; - static Variance create(Scope base, Output x) { + static Variance create(Scope base, Output x, Class type) { Scope s = base.withSubScope("variance"); - Output zero = Const.create(s.withName("zero"), new int[] {0}).output(); - Output sqdiff = + Output zero = Const.create(base, zeroValue(type), type).output(); + Output sqdiff = SquaredDifference.create( s.withName("squared_deviation"), x, Mean.create(s, x, zero).output()) .output(); - return new Variance(Mean.create(s.withName("variance"), sqdiff, zero).output()); + return new Variance(Mean.create(s.withName("variance"), sqdiff, zero).output()); } - Variance(Output o) { + Variance(Output o) { output = o; } - Output output() { + Output output() { return output; } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java b/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java index ec23792485..ca54214e06 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/op/core/ConstantTest.java @@ -29,7 +29,6 @@ import java.nio.LongBuffer; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; -import org.tensorflow.DataType; import org.tensorflow.Graph; import org.tensorflow.Session; import org.tensorflow.Tensor; @@ -47,8 +46,9 @@ public class ConstantTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope scope = new Scope(g); - Constant op = Constant.create(scope, shape, IntBuffer.wrap(ints)); - Tensor result = sess.runner().fetch(op.asOutput()).run().get(0); + Constant op = Constant.create(scope, shape, IntBuffer.wrap(ints)); + Tensor result = sess.runner().fetch(op.asOutput()) + .run().get(0).expect(Integer.class); int[] actual = new int[ints.length]; assertArrayEquals(ints, result.copyTo(actual)); } @@ -62,8 +62,8 @@ public class ConstantTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope scope = new Scope(g); - Constant op = Constant.create(scope, shape, FloatBuffer.wrap(floats)); - Tensor result = sess.runner().fetch(op.asOutput()).run().get(0); + Constant op = Constant.create(scope, shape, FloatBuffer.wrap(floats)); + Tensor result = sess.runner().fetch(op.asOutput()).run().get(0).expect(Float.class); float[] actual = new float[floats.length]; assertArrayEquals(floats, result.copyTo(actual), EPSILON); } @@ -77,8 +77,8 @@ public class ConstantTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope scope = new Scope(g); - Constant op = Constant.create(scope, shape, DoubleBuffer.wrap(doubles)); - Tensor result = sess.runner().fetch(op.asOutput()).run().get(0); + Constant op = Constant.create(scope, shape, DoubleBuffer.wrap(doubles)); + Tensor result = sess.runner().fetch(op.asOutput()).run().get(0).expect(Double.class); double[] actual = new double[doubles.length]; assertArrayEquals(doubles, result.copyTo(actual), EPSILON); } @@ -92,8 +92,8 @@ public class ConstantTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope scope = new Scope(g); - Constant op = Constant.create(scope, shape, LongBuffer.wrap(longs)); - Tensor result = sess.runner().fetch(op.asOutput()).run().get(0); + Constant op = Constant.create(scope, shape, LongBuffer.wrap(longs)); + Tensor result = sess.runner().fetch(op.asOutput()).run().get(0).expect(Long.class); long[] actual = new long[longs.length]; assertArrayEquals(longs, result.copyTo(actual)); } @@ -123,8 +123,8 @@ public class ConstantTest { try (Graph g = new Graph(); Session sess = new Session(g)) { Scope scope = new Scope(g); - Constant op = Constant.create(scope, DataType.STRING, shape, ByteBuffer.wrap(content)); - Tensor result = sess.runner().fetch(op.asOutput()).run().get(0); + Constant op = Constant.create(scope, String.class, shape, ByteBuffer.wrap(content)); + Tensor result = sess.runner().fetch(op.asOutput()).run().get(0).expect(String.class); assertArrayEquals(data, result.bytesValue()); } } diff --git a/tensorflow/python/debug/lib/debug_graphs.py b/tensorflow/python/debug/lib/debug_graphs.py index 486e659158..87033d53a4 100644 --- a/tensorflow/python/debug/lib/debug_graphs.py +++ b/tensorflow/python/debug/lib/debug_graphs.py @@ -231,8 +231,8 @@ def _infer_device_name(graph_def): break if device_name is None: logging.warn( - "Failed to infer device name from partiton GraphDef: none of the nodes " - "of the GraphDef has a non-empty device name.") + "Failed to infer device name from partition GraphDef: none of the " + "nodes of the GraphDef has a non-empty device name.") return device_name diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py index d7fe4bbfa1..c0a287e922 100644 --- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py +++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py @@ -49,7 +49,7 @@ except ImportError: def _fill_array(arr, seq, fillvalue=0): """ Recursively fills padded arr with elements from seq. - If lenght of seq is less then arr padded length, fillvalue used. + If length of seq is less than arr padded length, fillvalue used. Args: arr: Padded tensor of shape [batch_size, ..., max_padded_dim_len]. diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 97bef2965c..32e692ba7c 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -200,7 +200,7 @@ class TopologyConstructionTest(test.TestCase): with self.assertRaises(ValueError): _ = keras.layers.Input(shape=(32,), batch_shape=(10, 32)) with self.assertRaises(ValueError): - _ = keras.layers.Input(shape=(32,), unknwon_kwarg=None) + _ = keras.layers.Input(shape=(32,), unknown_kwarg=None) self.assertListEqual(a.get_shape().as_list(), [None, 32]) a_layer, a_node_index, a_tensor_index = a._keras_history diff --git a/tensorflow/python/kernel_tests/conv2d_transpose_test.py b/tensorflow/python/kernel_tests/conv2d_transpose_test.py index 18184a0ee0..7d0bc54b69 100644 --- a/tensorflow/python/kernel_tests/conv2d_transpose_test.py +++ b/tensorflow/python/kernel_tests/conv2d_transpose_test.py @@ -24,8 +24,12 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.client import device_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test @@ -289,6 +293,16 @@ class Conv2DTransposeTest(test.TestCase): self.assertAllClose(cache_values, value) + def testConv2DTransposeShapeInference(self): + # Test case for 8972 + initializer = random_ops.truncated_normal( + [3, 3, 5, 1], mean=0.0, stddev=0.01, dtype=dtypes.float32) + x = variables.Variable(random_ops.random_normal([3, 10, 5, 1])) + f = variable_scope.get_variable("f", initializer=initializer) + f_shape = array_ops.stack([array_ops.shape(x)[0], 10, 5, 5]) + output = nn_ops.conv2d_transpose( + x, f, f_shape, strides=[1, 1, 1, 1], padding="SAME") + self.assertEqual(output.get_shape().as_list(), [None, 10, 5, 5]) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/decode_csv_op_test.py b/tensorflow/python/kernel_tests/decode_csv_op_test.py index 3853379328..7d9e57c8e5 100644 --- a/tensorflow/python/kernel_tests/decode_csv_op_test.py +++ b/tensorflow/python/kernel_tests/decode_csv_op_test.py @@ -116,6 +116,17 @@ class DecodeCSVOpTest(test.TestCase): self._test(args, expected_out) + def testNA(self): + args = { + "records": ["2.0,NA,aa", "NA,5,bb", "3,6,NA"], + "record_defaults": [[0.0], [0], [""]], + "na_value": "NA" + } + + expected_out = [[2.0, 0.0, 3], [0, 5, 6], [b"aa", b"bb", b""]] + + self._test(args, expected_out) + def testWithDefaults(self): args = { "records": [",1,", "0.2,3,bcd", "3.0,,"], diff --git a/tensorflow/python/kernel_tests/summary_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_tensor_op_test.py index 3584637865..d534aadb79 100644 --- a/tensorflow/python/kernel_tests/summary_tensor_op_test.py +++ b/tensorflow/python/kernel_tests/summary_tensor_op_test.py @@ -154,7 +154,7 @@ class SummaryOpsTest(test.TestCase): self.assertEqual(descr.display_name, "my name") self.assertEqual(descr.summary_description, "my description") - # If both SummmaryMetadata and explicit args are provided, the args win + # If both SummaryMetadata and explicit args are provided, the args win overwrite = summary_ops.tensor_summary( "simple", const, diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt index 6e7122db5e..d27e867583 100644 --- a/tensorflow/python/ops/hidden_ops.txt +++ b/tensorflow/python/ops/hidden_ops.txt @@ -207,6 +207,7 @@ TextLineReaderV2 TFRecordReaderV2 WholeFileReaderV2 LMDBReader +DecodeCSV # linalg_ops BatchCholesky diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index c5fd15bae4..ea7132791c 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -1166,3 +1166,42 @@ def _parse_single_sequence_example_raw(serialized, feature_list_sparse_tensors + feature_list_dense_values)) return (context_output, feature_list_output) + + +# Swap `name` and `na_value` for backward compatibility. +def decode_csv(records, record_defaults, field_delim=",", + use_quote_delim=True, name=None, na_value=""): + # pylint: disable=protected-access + """Convert CSV records to tensors. Each column maps to one tensor. + + RFC 4180 format is expected for the CSV records. + (https://tools.ietf.org/html/rfc4180) + Note that we allow leading and trailing spaces with int or float field. + + Args: + records: A `Tensor` of type `string`. + Each string is a record/row in the csv and all records should have + the same format. + record_defaults: A list of `Tensor` objects with specific types. + Acceptable types are `float32`, `int32`, `int64`, `string`. + One tensor per column of the input record, with either a + scalar default value for that column or empty if the column is required. + field_delim: An optional `string`. Defaults to `","`. + char delimiter to separate fields in a record. + use_quote_delim: An optional `bool`. Defaults to `True`. + If false, treats double quotation marks as regular + characters inside of the string fields (ignoring RFC 4180, Section 2, + Bullet 5). + name: A name for the operation (optional). + na_value: Additional string to recognize as NA/NaN. + + Returns: + A list of `Tensor` objects. Has the same type as `record_defaults`. + Each tensor will have the same shape as records. + """ + # TODO(martinwicke), remove the wrapper when new Python API generator is done. + return gen_parsing_ops._decode_csv( + records=records, record_defaults=record_defaults, + field_delim=field_delim, use_quote_delim=use_quote_delim, + na_value=na_value, name=name) + # pylint: enable=protected-access diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index bf8380ebbd..0a1a748c40 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -562,7 +562,7 @@ static bool TensorOpMathEnabled() { bool ret; TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DISABLE_TENSOR_OP_MATH", /*default=*/false, &ret)); - return ret; + return !ret; }(); return is_enabled; } @@ -2474,58 +2474,73 @@ struct WinogradNonfused { }; bool CudnnSupport::GetConvolveAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { - out_algorithms->assign({ - // clang-format off - CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, - CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, - CUDNN_CONVOLUTION_FWD_ALGO_GEMM, - CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, - CUDNN_CONVOLUTION_FWD_ALGO_FFT, + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { + std::vector algo_types = { + // clang-format off + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM, + CUDNN_CONVOLUTION_FWD_ALGO_GEMM, + CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, + CUDNN_CONVOLUTION_FWD_ALGO_FFT, #if CUDNN_VERSION >= 5000 - CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, + CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD, #endif - // clang-format on - }); + // clang-format on + }; if (CudnnEnvVar::IsEnabled()) { - out_algorithms->push_back(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING); + algo_types.push_back(CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING); } #if CUDNN_VERSION >= 5100 if (CudnnEnvVar::IsEnabled() && with_winograd_nonfused) { - out_algorithms->push_back(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED); + algo_types.push_back(CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED); } #endif + + out_algorithms->clear(); + for (auto i : algo_types) { + out_algorithms->push_back({i, /*use_tensor_ops=*/false}); + if (cc_major >= 7 && CUDNN_VERSION >= 7000 && TensorOpMathEnabled()) { + out_algorithms->push_back({i, /*use_tensor_ops=*/true}); + } + } return true; } bool CudnnSupport::GetConvolveBackwardDataAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { - out_algorithms->assign({ - // clang-format off - CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, - CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { + std::vector algo_types = { + // clang-format off + CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING, #if CUDNN_VERSION >= 5000 - CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, + CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD, #endif - // clang-format on - }); + // clang-format on + }; #if CUDNN_VERSION >= 5100 if (CudnnEnvVar::IsEnabled() && with_winograd_nonfused) { - out_algorithms->push_back( - CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED); + algo_types.push_back(CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED); } #endif + + out_algorithms->clear(); + for (auto i : algo_types) { + out_algorithms->push_back({i, /*use_tensor_ops=*/false}); + if (cc_major >= 7 && CUDNN_VERSION >= 7000 && TensorOpMathEnabled()) { + out_algorithms->push_back({i, /*use_tensor_ops=*/true}); + } + } return true; } bool CudnnSupport::GetConvolveBackwardFilterAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { - out_algorithms->assign({ + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { + std::vector algo_types = { // clang-format off CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, @@ -2534,13 +2549,20 @@ bool CudnnSupport::GetConvolveBackwardFilterAlgorithms( // Based on cudnn.h, the following is not implemented. // CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD, // clang-format on - }); + }; #if CUDNN_VERSION >= 5110 if (CudnnEnvVar::IsEnabled() && with_winograd_nonfused) { - out_algorithms->push_back( - CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED); + algo_types.push_back(CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED); } #endif + + out_algorithms->clear(); + for (auto i : algo_types) { + out_algorithms->push_back({i, /*use_tensor_ops=*/false}); + if (cc_major >= 7 && CUDNN_VERSION >= 7000 && TensorOpMathEnabled()) { + out_algorithms->push_back({i, /*use_tensor_ops=*/true}); + } + } return true; } diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index beb2f7d050..8d7069a902 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -145,16 +145,16 @@ class CudnnSupport : public dnn::DnnSupport { ScratchAllocator* workspace_allocator) override; bool GetConvolveAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) override; + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) override; bool GetConvolveBackwardDataAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) override; + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) override; bool GetConvolveBackwardFilterAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) override; + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) override; bool DoBatchNormalizationForward( Stream* stream, const DeviceMemory& x, diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 2c40e18f5c..07fe8a85f4 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -23,20 +23,20 @@ namespace gputools { namespace dnn { bool DnnSupport::GetConvolveAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { return false; } bool DnnSupport::GetConvolveBackwardDataAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { return false; } bool DnnSupport::GetConvolveBackwardFilterAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms) { + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms) { return false; } diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 5fe523602a..624357b82f 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -1183,8 +1183,8 @@ class DnnSupport { // Return a list of algorithms supported by the forward convolution pass. virtual bool GetConvolveAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms); + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms); // Version of DoConvolve that uses pre-quantized 8 bit coefficients. // coefficient_scales specifies the scaling of each column of coefficients: @@ -1263,8 +1263,8 @@ class DnnSupport { // Return a list of algorithms supported by the backward convolution pass for // data. virtual bool GetConvolveBackwardDataAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms); + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms); virtual bool DoConvolveBackwardData( Stream* stream, const FilterDescriptor& filter_descriptor, @@ -1312,8 +1312,8 @@ class DnnSupport { // Return a list of algorithms supported by the backward convolution pass for // filters. virtual bool GetConvolveBackwardFilterAlgorithms( - bool with_winograd_nonfused, - std::vector* out_algorithms); + bool with_winograd_nonfused, int cc_major, int cc_minor, + std::vector* out_algorithms); virtual bool DoConvolveBackwardFilter( Stream* stream, const BatchDescriptor& input_descriptor, diff --git a/tensorflow/stream_executor/platform.h b/tensorflow/stream_executor/platform.h index ed12982e30..f0a0e60e02 100644 --- a/tensorflow/stream_executor/platform.h +++ b/tensorflow/stream_executor/platform.h @@ -96,7 +96,7 @@ class Platform { // each platform is required to expose an ID to ensure unique registration and // as a target against which plugins can register. // - // The macro below is provided to help generate a [process-unique] identifer. + // The macro below is provided to help generate a [process-unique] identifier. using Id = void*; // Helper macro to define a plugin ID. To be used only inside plugin diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index a72ee804c1..21172d5a16 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -70,7 +70,7 @@ class BatchDescriptor; class FilterDescriptor; class ConvolutionDescriptor; class ProfileResult; -struct AlgorithmDesc; +class AlgorithmDesc; } // namespace dnn class StreamExecutor; diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index 199a908914..9bbfe7f04a 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -286,35 +286,41 @@ bool StreamExecutor::SupportsDnn() const { bool StreamExecutor::GetConvolveAlgorithms( bool with_winograd_nonfused, - std::vector *out_algorithms) { + std::vector *out_algorithms) { dnn::DnnSupport *dnn_support = AsDnn(); if (!dnn_support) { return false; } - return dnn_support->GetConvolveAlgorithms(with_winograd_nonfused, - out_algorithms); + int cc_major, cc_minor; + GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); + return dnn_support->GetConvolveAlgorithms(with_winograd_nonfused, cc_major, + cc_minor, out_algorithms); } bool StreamExecutor::GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, - std::vector *out_algorithms) { + std::vector *out_algorithms) { dnn::DnnSupport *dnn_support = AsDnn(); if (!dnn_support) { return false; } - return dnn_support->GetConvolveBackwardDataAlgorithms(with_winograd_nonfused, - out_algorithms); + int cc_major, cc_minor; + GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); + return dnn_support->GetConvolveBackwardDataAlgorithms( + with_winograd_nonfused, cc_major, cc_minor, out_algorithms); } bool StreamExecutor::GetConvolveBackwardFilterAlgorithms( bool with_winograd_nonfused, - std::vector *out_algorithms) { + std::vector *out_algorithms) { dnn::DnnSupport *dnn_support = AsDnn(); if (!dnn_support) { return false; } + int cc_major, cc_minor; + GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor); return dnn_support->GetConvolveBackwardFilterAlgorithms( - with_winograd_nonfused, out_algorithms); + with_winograd_nonfused, cc_major, cc_minor, out_algorithms); } bool StreamExecutor::GetBlasGemmAlgorithms( diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index 98136a92a0..f354317a6e 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -343,20 +343,19 @@ class StreamExecutor { bool SupportsDnn() const; // Get the list of supported algorithms for the forward convolution opeartion. - bool GetConvolveAlgorithms( - bool with_winograd_nonfused, - std::vector *out_algorithms); + bool GetConvolveAlgorithms(bool with_winograd_nonfused, + std::vector *out_algorithms); // Get the list of supported algorithms for the backward convolution on data. bool GetConvolveBackwardDataAlgorithms( bool with_winograd_nonfused, - std::vector *out_algorithms); + std::vector *out_algorithms); // Get the list of supported algorithms for the backward convolution on the // filter. bool GetConvolveBackwardFilterAlgorithms( bool with_winograd_nonfused, - std::vector *out_algorithms); + std::vector *out_algorithms); // Get the list of supported algorithms for BLAS gemm. bool GetBlasGemmAlgorithms(std::vector *out_algorithms); diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index a308688790..0f074151db 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -526,6 +526,7 @@ def tf_cc_test(name, extra_copts=[], suffix="", linkopts=[], + nocopts=None, **kwargs): native.cc_test( name="%s%s" % (name, suffix), @@ -547,6 +548,7 @@ def tf_cc_test(name, clean_dep("//tensorflow:darwin"): 1, "//conditions:default": 0, }), + nocopts=nocopts, **kwargs) @@ -649,7 +651,8 @@ def tf_cc_tests(srcs, tags=[], size="medium", args=None, - linkopts=[]): + linkopts=[], + nocopts=None): for src in srcs: tf_cc_test( name=src_to_test_name(src), @@ -659,7 +662,8 @@ def tf_cc_tests(srcs, tags=tags, size=size, args=args, - linkopts=linkopts) + linkopts=linkopts, + nocopts=nocopts) def tf_cc_test_mkl(srcs, @@ -669,7 +673,7 @@ def tf_cc_test_mkl(srcs, tags=[], size="medium", args=None): - if_mkl(tf_cc_tests(srcs, deps, linkstatic, tags=tags, size=size, args=args)) + if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions")) def tf_cc_tests_gpu(srcs, @@ -867,18 +871,33 @@ def tf_mkl_kernel_library(name, deps=None, alwayslink=1, copts=tf_copts(), + nocopts="-fno-exceptions", **kwargs): + """A rule to build MKL-based TensorFlow kernel libraries.""" + gpu_srcs = gpu_srcs # unused argument + kwargs = kwargs # unused argument + + if not bool(srcs): + srcs = [] + if not bool(hdrs): + hdrs = [] + + if prefix: + srcs = srcs + native.glob( + [prefix + "*.cc"]) + hdrs = hdrs + native.glob( + [prefix + "*.h"]) + if_mkl( - tf_kernel_library( - name, - prefix=prefix, + native.cc_library( + name=name, srcs=srcs, - gpu_srcs=gpu_srcs, hdrs=hdrs, deps=deps, alwayslink=alwayslink, copts=copts, - **kwargs)) + nocopts=nocopts + )) # Bazel rules for building swig files. diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 32a86e420a..6e03f9e8fb 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -874,7 +874,7 @@ tf_module { } member_method { name: "decode_csv" - argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\'], " + argspec: "args=[\'records\', \'record_defaults\', \'field_delim\', \'use_quote_delim\', \'name\', \'na_value\'], varargs=None, keywords=None, defaults=[\',\', \'True\', \'None\', \'\'], " } member_method { name: "decode_json_example" diff --git a/tensorflow/tools/ci_build/install/install_golang.sh b/tensorflow/tools/ci_build/install/install_golang.sh index 88bc2960e3..596265b069 100755 --- a/tensorflow/tools/ci_build/install/install_golang.sh +++ b/tensorflow/tools/ci_build/install/install_golang.sh @@ -16,7 +16,7 @@ set -ex -GOLANG_URL="https://storage.googleapis.com/golang/go1.8.3.linux-amd64.tar.gz" +GOLANG_URL="https://storage.googleapis.com/golang/go1.9.linux-amd64.tar.gz" sudo mkdir -p /usr/local wget -q -O - "${GOLANG_URL}" | sudo tar -C /usr/local -xz diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index f5364d803a..04773376e9 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -78,10 +78,12 @@ WORKDIR /tensorflow # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/lib64/stubs:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1 +RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + RUN tensorflow/tools/ci_build/builds/configured GPU \ bazel build -c opt --config=cuda --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ tensorflow/tools/pip_package:build_pip_package && \ diff --git a/tensorflow/tools/docker/jupyter_notebook_config.py b/tensorflow/tools/docker/jupyter_notebook_config.py index 747beb8251..0acbf6fcee 100644 --- a/tensorflow/tools/docker/jupyter_notebook_config.py +++ b/tensorflow/tools/docker/jupyter_notebook_config.py @@ -18,7 +18,6 @@ from IPython.lib import passwd c.NotebookApp.ip = '*' c.NotebookApp.port = int(os.getenv('PORT', 8888)) c.NotebookApp.open_browser = False -c.MultiKernelManager.default_kernel_name = 'python2' # sets a password if PASSWORD is set in the environment if 'PASSWORD' in os.environ: diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index ca3b778c29..1015103077 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -923,7 +923,7 @@ class _ClassPageInfo(object): """Sets the `aliases` list. Args: - aliases: A list of strings. Containing all the obejct's full names. + aliases: A list of strings. Containing all the object's full names. """ assert self.aliases is None self._aliases = aliases @@ -1438,7 +1438,7 @@ class _PythonBuiltin(object): class _PythonFile(object): """This class indicates that the object is defined in a regular python file. - This can be used for the `defined_in` slot of the `PageInfo` obejcts. + This can be used for the `defined_in` slot of the `PageInfo` objects. """ def __init__(self, path, parser_config): diff --git a/tensorflow/tools/proto_text/gen_proto_text_functions_lib_test.cc b/tensorflow/tools/proto_text/gen_proto_text_functions_lib_test.cc index 81f85e0009..6f0b4f47de 100644 --- a/tensorflow/tools/proto_text/gen_proto_text_functions_lib_test.cc +++ b/tensorflow/tools/proto_text/gen_proto_text_functions_lib_test.cc @@ -93,13 +93,15 @@ TEST(CreateProtoDebugStringLibTest, ValidSimpleTypes) { proto.set_optional_int64(std::numeric_limits::max()); proto.set_optional_uint32(std::numeric_limits::max()); proto.set_optional_uint64(std::numeric_limits::max()); - proto.set_optional_float(std::numeric_limits::max()); + // TODO(b/67475677): Re-enable after resolving float precision issue + // proto.set_optional_float(std::numeric_limits::max()); proto.set_optional_double(std::numeric_limits::max()); EXPECT_TEXT_TRANSFORMS_MATCH(); // Least positive numeric values. proto.Clear(); - proto.set_optional_float(std::numeric_limits::min()); + // TODO(b/67475677): Re-enable after resolving float precision issue + // proto.set_optional_float(std::numeric_limits::min()); proto.set_optional_double(std::numeric_limits::min()); EXPECT_TEXT_TRANSFORMS_MATCH(); @@ -107,7 +109,8 @@ TEST(CreateProtoDebugStringLibTest, ValidSimpleTypes) { proto.Clear(); proto.set_optional_int32(std::numeric_limits::lowest()); proto.set_optional_int64(std::numeric_limits::lowest()); - proto.set_optional_float(std::numeric_limits::lowest()); + // TODO(b/67475677): Re-enable after resolving float precision issue + // proto.set_optional_float(std::numeric_limits::lowest()); proto.set_optional_double(std::numeric_limits::lowest()); EXPECT_TEXT_TRANSFORMS_MATCH(); diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b226184261..de0084613b 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -170,6 +170,17 @@ def tf_workspace(path_prefix="", tf_repo_name=""): print("path_prefix was specified to tf_workspace but is no longer used " + "and will be removed in the future.") + native.new_http_archive( + name = "mkl_dnn", + urls = [ + "https://github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz", + "http://mirror.bazel.build/github.com/01org/mkl-dnn/archive/b01e3a55a07be62172e713bcd2644c5176360212.tar.gz", + ], + sha256 = "0d529ad4c49dc799e6df07c2b88b115d0668735da15fb3b3862d28d33fa68165", + strip_prefix = "mkl-dnn-b01e3a55a07be62172e713bcd2644c5176360212", + build_file = str(Label("//third_party/mkl_dnn:mkldnn.BUILD")), + ) + native.new_http_archive( name = "eigen_archive", urls = [ @@ -373,10 +384,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): patched_http_archive( name = "protobuf_archive", urls = [ - "http://mirror.bazel.build/github.com/google/protobuf/archive/0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66.tar.gz", + "http://mirror.bazel.build/github.com/google/protobuf/archive/b04e5cba356212e4e8c66c61bbe0c3a20537c5b9.tar.gz", ], - sha256 = "6d43b9d223ce09e5d4ce8b0060cb8a7513577a35a64c7e3dad10f0703bf3ad93", - strip_prefix = "protobuf-0b059a3d8a8f8aa40dde7bea55edca4ec5dfea66", + sha256 = "e178a25c52efcb6b05988bdbeace4c0d3f2d2fe5b46696d1d9898875c3803d6a", + strip_prefix = "protobuf-b04e5cba356212e4e8c66c61bbe0c3a20537c5b9", # TODO: remove patching when tensorflow stops linking same protos into # multiple shared libraries loaded in runtime by python. # This patch fixes a runtime crash when tensorflow is compiled diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index baa6e01bca..31a4bfabf6 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -117,7 +117,7 @@ def get_cxx_inc_directories(repository_ctx, cc): includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) - includes_cpp_set = set(includes_cpp) + includes_cpp_set = depset(includes_cpp) return includes_cpp + [inc for inc in includes_c if inc not in includes_cpp_set] diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD new file mode 100644 index 0000000000..5b01f6e3e4 --- /dev/null +++ b/third_party/mkl_dnn/BUILD @@ -0,0 +1 @@ +licenses(["notice"]) diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD new file mode 100644 index 0000000000..58bb7a6a5d --- /dev/null +++ b/third_party/mkl_dnn/mkldnn.BUILD @@ -0,0 +1,25 @@ +exports_files(["LICENSE"]) + +cc_library( + name = "mkl_dnn", + srcs = glob([ + "src/common/*.cpp", + "src/cpu/*.cpp", + ]), + hdrs = glob(["include/*"]), + copts = ["-fexceptions"] + select({ + "@org_tensorflow//tensorflow:linux_x86_64": [ + "-fopenmp", + ], + "//conditions:default": [], + }), + includes = [ + "include", + "src", + "src/common", + "src/cpu", + "src/cpu/xbyak", + ], + nocopts = "-fno-exceptions", + visibility = ["//visibility:public"], +) -- GitLab From aa20fc1aea6d3fdf4e0ba821e8e4ef5c08cfd282 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 12:12:10 -0700 Subject: [PATCH 128/909] [XLA:CPU] Rename GetIrArrayForOp to GetIrArrayFor. This makes it consistent with the other similar functions in IrEmitter. PiperOrigin-RevId: 171325815 --- .../compiler/xla/service/cpu/ir_emitter.cc | 78 +++++++++---------- .../compiler/xla/service/cpu/ir_emitter.h | 6 +- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index e4fb7c0496..ec9a69709d 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -395,7 +395,7 @@ Status IrEmitter::HandleSelect(HloInstruction* select, HloInstruction* pred, if (ShapeUtil::IsTuple(select->shape())) { TF_RETURN_IF_ERROR(EmitTargetAddressForOp(select)); - llvm_ir::EmitTupleSelect(GetIrArrayForOp(select), GetIrArrayForOp(pred), + llvm_ir::EmitTupleSelect(GetIrArrayFor(select), GetIrArrayFor(pred), GetEmittedValueFor(on_true), GetEmittedValueFor(on_false), &ir_builder_); return Status::OK(); @@ -412,7 +412,7 @@ Status IrEmitter::HandleInfeed(HloInstruction* infeed) { // The infeed operation produces data (dequeued from the infeed queue) at this // address, which has been provided by buffer assignment. TF_RETURN_IF_ERROR(EmitTargetAddressForOp(infeed)); - llvm_ir::IrArray infeed_array = GetIrArrayForOp(infeed); + llvm_ir::IrArray infeed_array = GetIrArrayFor(infeed); if (ShapeUtil::IsTuple(shape)) { TF_RET_CHECK(!ShapeUtil::IsNestedTuple(shape)); @@ -566,7 +566,7 @@ Status IrEmitter::HandleTuple( for (auto operand : operands) { base_ptrs.push_back(GetEmittedValueFor(operand)); } - llvm_ir::EmitTuple(GetIrArrayForOp(tuple), base_ptrs, &ir_builder_); + llvm_ir::EmitTuple(GetIrArrayFor(tuple), base_ptrs, &ir_builder_); return Status::OK(); } @@ -581,7 +581,7 @@ Status IrEmitter::HandleMap( const llvm_ir::IrArray::Index& index) { std::vector parameter_addresses; for (const HloInstruction* operand : operands) { - const llvm_ir::IrArray& array = GetIrArrayForOp(operand); + const llvm_ir::IrArray& array = GetIrArrayFor(operand); parameter_addresses.push_back( array.EmitArrayElementAddress(index, &ir_builder_)); } @@ -677,7 +677,7 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window, SetToFirstInsertPoint(if_data.true_block, &ir_builder_); // We are not in the padding, so carry out the computation. - llvm_ir::IrArray input_array(GetIrArrayForOp(operand)); + llvm_ir::IrArray input_array(GetIrArrayFor(operand)); llvm::Value* input_value_address = input_array.EmitArrayElementAddress(input_index, &ir_builder_); llvm::Value* result = EmitElementFunctionCall( @@ -814,7 +814,7 @@ Status IrEmitter::HandleSelectAndScatter(HloInstruction* select_and_scatter) { ir_builder_.CreateStore(operand_index[i], selected_index_address_slot); } }; - llvm_ir::IrArray operand_array(GetIrArrayForOp(operand)); + llvm_ir::IrArray operand_array(GetIrArrayFor(operand)); llvm::Value* operand_data = operand_array.EmitReadArrayElement(operand_index, &ir_builder_); ir_builder_.CreateStore(operand_data, selected_value_address); @@ -857,10 +857,10 @@ Status IrEmitter::HandleSelectAndScatter(HloInstruction* select_and_scatter) { selected_index.push_back( ir_builder_.CreateLoad(selected_index_address_slot)); } - llvm_ir::IrArray source_array(GetIrArrayForOp(source)); + llvm_ir::IrArray source_array(GetIrArrayFor(source)); llvm::Value* source_value_address = source_array.EmitArrayElementAddress(source_index, &ir_builder_); - llvm_ir::IrArray output_array(GetIrArrayForOp(select_and_scatter)); + llvm_ir::IrArray output_array(GetIrArrayFor(select_and_scatter)); llvm::Value* output_value_address = output_array.EmitArrayElementAddress(selected_index, &ir_builder_); llvm::Value* scatter_value = EmitElementFunctionCall( @@ -880,11 +880,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot, HloInstruction* lhs, /*instruction=*/*dot, /*operands=*/{lhs, rhs}, /*supported_types=*/{F32, F64})); - llvm_ir::IrArray lhs_array(GetIrArrayForOp(lhs)); - llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); + llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); + llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); TF_RETURN_IF_ERROR(EmitTargetAddressForOp(dot)); - llvm_ir::IrArray target_array = GetIrArrayForOp(dot); + llvm_ir::IrArray target_array = GetIrArrayFor(dot); VLOG(2) << "HandleDot: "; VLOG(2) << " lhs operand: " @@ -1163,7 +1163,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, input_index[dnums.feature_dimension()] = input_feature; input_index[dnums.batch_dimension()] = batch; - llvm_ir::IrArray kernel_array(GetIrArrayForOp(rhs)); + llvm_ir::IrArray kernel_array(GetIrArrayFor(rhs)); llvm_ir::IrArray::Index kernel_index(num_dims); for (int i = 0; i < num_spatial_dims; ++i) { kernel_index[dnums.kernel_spatial_dimensions(i)] = kernel_spatial[i]; @@ -1171,7 +1171,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution, kernel_index[dnums.kernel_input_feature_dimension()] = input_feature; kernel_index[dnums.kernel_output_feature_dimension()] = output_feature; - llvm_ir::IrArray input_array(GetIrArrayForOp(lhs)); + llvm_ir::IrArray input_array(GetIrArrayFor(lhs)); llvm::Value* product = ir_builder_.CreateFMul( input_array.EmitReadArrayElement(input_index, &ir_builder_), kernel_array.EmitReadArrayElement(kernel_index, &ir_builder_)); @@ -1305,7 +1305,7 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &ir_builder_); - llvm_ir::IrArray operand_array(GetIrArrayForOp(operand)); + llvm_ir::IrArray operand_array(GetIrArrayFor(operand)); llvm_ir::IrArray::Index input_index = FillReducedDimensionIndex(reduced_dims_index, index); llvm::Value* new_value = @@ -1379,7 +1379,7 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { llvm::Value* var = var_array.EmitReadArrayElement( feature_index_value, &ir_builder_); - llvm_ir::IrArray operand_array(GetIrArrayForOp(operand)); + llvm_ir::IrArray operand_array(GetIrArrayFor(operand)); llvm::Value* input = operand_array.EmitReadArrayElement(index, &ir_builder_); @@ -1391,10 +1391,10 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { ir_builder_.CreateCall(func_llvm_sqrt, {variance_with_epsilon}); llvm::Value* normalized = ir_builder_.CreateFDiv( ir_builder_.CreateFSub(input, mean), variance_sqrt); - llvm_ir::IrArray offset_array(GetIrArrayForOp(offset)); + llvm_ir::IrArray offset_array(GetIrArrayFor(offset)); llvm::Value* offset = offset_array.EmitReadArrayElement( feature_index_value, &ir_builder_); - llvm_ir::IrArray scale_array(GetIrArrayForOp(scale)); + llvm_ir::IrArray scale_array(GetIrArrayFor(scale)); llvm::Value* scale = scale_array.EmitReadArrayElement( feature_index_value, &ir_builder_); llvm::Value* result = ir_builder_.CreateFAdd( @@ -1405,7 +1405,7 @@ Status IrEmitter::HandleBatchNormTraining(HloInstruction* batch_norm_training) { target_array, &ir_builder_) .EmitLoop(IrName(batch_norm_training, "normalize"))); - llvm_ir::EmitTuple(GetIrArrayForOp(batch_norm_training), + llvm_ir::EmitTuple(GetIrArrayFor(batch_norm_training), {normalized, mean, var}, &ir_builder_); return Status::OK(); } @@ -1653,7 +1653,7 @@ IrEmitter::EmitInnerLoopForVectorizedReduction( SetToFirstInsertPoint(reduction_loop_nest.GetInnerLoopBodyBasicBlock(), &ir_builder_); - llvm_ir::IrArray arg_array(GetIrArrayForOp(arg)); + llvm_ir::IrArray arg_array(GetIrArrayFor(arg)); llvm_ir::IrArray::Index input_index = reduced_dims_index; llvm_ir::IrArray::Index::const_iterator it = output_index.begin(); @@ -1829,7 +1829,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); + llvm_ir::IrArray target_array = GetIrArrayFor(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1861,7 +1861,7 @@ StatusOr IrEmitter::EmitVectorizedReduce( reduction_generator, array_index, vector_type, init_value, arg, dimensions, element_alignment)); - llvm_ir::IrArray target_array = GetIrArrayForOp(reduce); + llvm_ir::IrArray target_array = GetIrArrayFor(reduce); llvm::Value* output_address = target_array.EmitArrayElementAddress(array_index, &ir_builder_); EmitShardedVectorStore(output_address, accumulator, element_alignment, @@ -1928,7 +1928,7 @@ Status IrEmitter::HandleReduce(HloInstruction* reduce, HloInstruction* arg, // filled in. We fill in the rest of the dimensions with induction // Value*s taken from 'index' which iterates over the target array. // See the high-level description in the XLA documentation for details. - llvm_ir::IrArray arg_array(GetIrArrayForOp(arg)); + llvm_ir::IrArray arg_array(GetIrArrayFor(arg)); llvm_ir::IrArray::Index input_index = reduced_dims_index; llvm_ir::IrArray::Index::const_iterator it = index.begin(); @@ -2043,7 +2043,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { outer_dims.push_back(memcpy_dim); } - llvm_ir::IrArray target_array = GetIrArrayForOp(slice); + llvm_ir::IrArray target_array = GetIrArrayFor(slice); const int64 num_outer_loops = outer_dims.size(); llvm_ir::ForLoopNest loops(IrName(slice), &ir_builder_); @@ -2061,7 +2061,7 @@ Status IrEmitter::HandleSlice(HloInstruction* slice, HloInstruction* operand) { SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &ir_builder_); } - llvm_ir::IrArray source_array = GetIrArrayForOp(operand); + llvm_ir::IrArray source_array = GetIrArrayFor(operand); const llvm_ir::IrArray::Index source_index = target_index.SourceIndexOfSlice( /*shape=*/slice->shape(), /*starts=*/slice->slice_starts(), /*strides=*/slice->slice_strides(), /*builder=*/&ir_builder_); @@ -2166,7 +2166,7 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, llvm_ir::IrArray::Index start_index(rank); for (int64 i = 0; i < rank; ++i) { llvm_ir::IrArray::Index dim_index({ir_builder_.getInt64(i)}); - llvm_ir::IrArray start_indices_array(GetIrArrayForOp(start_indices)); + llvm_ir::IrArray start_indices_array(GetIrArrayFor(start_indices)); start_index[i] = start_indices_array.EmitReadArrayElement(dim_index, &ir_builder_); } @@ -2192,13 +2192,13 @@ Status IrEmitter::HandleDynamicUpdateSlice(HloInstruction* dynamic_update_slice, } // Read value from 'update'. - llvm_ir::IrArray update_array(GetIrArrayForOp(update)); + llvm_ir::IrArray update_array(GetIrArrayFor(update)); llvm::Value* update_data = update_array.EmitReadArrayElement(index, &ir_builder_); // Write value to output array. - GetIrArrayForOp(operand).EmitWriteArrayElement(output_index, update_data, - &ir_builder_); + GetIrArrayFor(operand).EmitWriteArrayElement(output_index, update_data, + &ir_builder_); return Status::OK(); }; @@ -2249,7 +2249,7 @@ Status IrEmitter::HandlePad(HloInstruction* pad) { SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), &ir_builder_); // Load an element from the operand. - llvm_ir::IrArray operand_array(GetIrArrayForOp(operand)); + llvm_ir::IrArray operand_array(GetIrArrayFor(operand)); llvm::Value* operand_data = operand_array.EmitReadArrayElement(operand_index, &ir_builder_); @@ -2269,7 +2269,7 @@ Status IrEmitter::HandlePad(HloInstruction* pad) { } // Store the operand element to the computed output location. - llvm_ir::IrArray output_array(GetIrArrayForOp(pad)); + llvm_ir::IrArray output_array(GetIrArrayFor(pad)); output_array.EmitWriteArrayElement(output_index, operand_data, &ir_builder_); SetToFirstInsertPoint(loops.GetOuterLoopExitBasicBlock(), &ir_builder_); @@ -2301,12 +2301,12 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { /*instruction=*/*dot, /*operands=*/{lhs, rhs}, /*supported_types=*/{F32})); - llvm_ir::IrArray lhs_array(GetIrArrayForOp(lhs)); - llvm_ir::IrArray rhs_array(GetIrArrayForOp(rhs)); + llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); + llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); Shape target_shape = fusion->shape(); TF_RETURN_IF_ERROR(EmitTargetAddressForOp(fusion)); - llvm_ir::IrArray target_array = GetIrArrayForOp(fusion); + llvm_ir::IrArray target_array = GetIrArrayFor(fusion); VLOG(2) << "HandleFusion kTransposeDot: "; VLOG(2) << " lhs operand: " << llvm_ir::DumpToString(*lhs_array.GetBasePointer()); @@ -2324,7 +2324,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { } else if (fusion->fusion_kind() == HloInstruction::FusionKind::kLoop) { std::vector parameter_arrays; for (HloInstruction* operand : fusion->operands()) { - parameter_arrays.push_back(GetIrArrayForOp(operand)); + parameter_arrays.push_back(GetIrArrayFor(operand)); } CpuElementalIrEmitter elemental_emitter(hlo_module_config_, this, module_); FusedIrEmitter fused_emitter(parameter_arrays, &elemental_emitter); @@ -2527,7 +2527,7 @@ StatusOr IrEmitter::EmitFastConcatenate( llvm::Type* i8_type = ir_builder_.getInt8Ty(); TF_RETURN_IF_ERROR(EmitTargetAddressForOp(concatenate)); - llvm_ir::IrArray target_array = GetIrArrayForOp(concatenate); + llvm_ir::IrArray target_array = GetIrArrayFor(concatenate); llvm_ir::ForLoopNest loops(IrName(concatenate), &ir_builder_); llvm_ir::IrArray::Index outer_dims_index = @@ -2562,7 +2562,7 @@ StatusOr IrEmitter::EmitFastConcatenate( // equal to the product of inner dimensions. for (HloInstruction* operand : operands) { const Shape& input_shape = operand->shape(); - llvm_ir::IrArray source_array = GetIrArrayForOp(operand); + llvm_ir::IrArray source_array = GetIrArrayFor(operand); llvm::Value* copy_source_address = ir_builder_.CreateBitCast( source_array.EmitArrayElementAddress(outer_dims_index, &ir_builder_, "src_addr"), @@ -2785,7 +2785,7 @@ Status IrEmitter::Postprocess(HloInstruction* hlo) { return Status::OK(); } -llvm_ir::IrArray IrEmitter::GetIrArrayForOp(const HloInstruction* hlo) { +llvm_ir::IrArray IrEmitter::GetIrArrayFor(const HloInstruction* hlo) { llvm::Value* value_for_op = GetEmittedValueFor(hlo); llvm_ir::IrArray array(value_for_op, hlo->shape()); @@ -2995,7 +2995,7 @@ Status IrEmitter::EmitTargetElementLoop( const Shape& target_shape = target_op->shape(); TF_RETURN_IF_ERROR(EmitTargetAddressForOp(target_op)); - llvm_ir::IrArray target_array = GetIrArrayForOp(target_op); + llvm_ir::IrArray target_array = GetIrArrayFor(target_op); if (target_op->IsMultiOutputFusion()) { // For multiple outputs fusion, we need to emit each operand and the root. @@ -3121,7 +3121,7 @@ Status IrEmitter::DefaultAction(HloInstruction* hlo) { ElementalIrEmitter::HloToElementGeneratorMap operand_to_generator; for (const HloInstruction* operand : hlo->operands()) { operand_to_generator[operand] = [=](const llvm_ir::IrArray::Index& index) { - return GetIrArrayForOp(operand).EmitReadArrayElement(index, &ir_builder_); + return GetIrArrayFor(operand).EmitReadArrayElement(index, &ir_builder_); }; } CpuElementalIrEmitter elemental_emitter(hlo_module_config_, this, module_); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index fd9ee71799..b15026b6da 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -220,8 +220,8 @@ class IrEmitter : public DfsHloVisitorWithDefault { // Gets the IR Value emitted previously for the given hlo. // - // Prefer calling GetIrArrayForOp if the value you're reading is a buffer, - // because GetIrArrayForOp annotates buffer's loads/stores with noalias + // Prefer calling GetIrArrayFor if the value you're reading is a buffer, + // because GetIrArrayFor annotates buffer's loads/stores with noalias // metadata. // // Make sure to call this only when you're certain a value *was* emitted - if @@ -229,7 +229,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { llvm::Value* GetEmittedValueFor(const HloInstruction* hlo); // Gets an IrArray representing the given hlo. - llvm_ir::IrArray GetIrArrayForOp(const HloInstruction* hlo); + llvm_ir::IrArray GetIrArrayFor(const HloInstruction* hlo); // Augments IrArray with aliasing information. void AddAliasingInformationToIrArray(const HloInstruction& hlo, -- GitLab From e35372fe3e8a5de4a90a42cdd5a62c5e0fe452ff Mon Sep 17 00:00:00 2001 From: Jeff Carpenter Date: Fri, 6 Oct 2017 12:20:05 -0700 Subject: [PATCH 129/909] Fix unevaluated link in "Reading data" docs --- tensorflow/docs_src/api_guides/python/reading_data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/api_guides/python/reading_data.md b/tensorflow/docs_src/api_guides/python/reading_data.md index 8b6196ea34..e7fb05f9b5 100644 --- a/tensorflow/docs_src/api_guides/python/reading_data.md +++ b/tensorflow/docs_src/api_guides/python/reading_data.md @@ -58,7 +58,7 @@ A typical pipeline for reading records from files has the following stages: 8. Example queue Note: This section discusses implementing input pipelines using the -queue-based APIs which can be cleanly replaced by the ${$datasets$Dataset API}. +queue-based APIs which can be cleanly replaced by the @{$datasets$Datasets API}. ### Filenames, shuffling, and epoch limits -- GitLab From 1d3d4ed02feca370e9009193946cd7efb458b7b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 12:26:42 -0700 Subject: [PATCH 130/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171327794 --- .../core/ops/compat/ops_history.v1.pbtxt | 50 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 10 +++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index a3321c26f3..f8667177cc 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -6831,6 +6831,56 @@ op { } } } +op { + name: "DecodeCSV" + input_arg { + name: "records" + type: DT_STRING + } + input_arg { + name: "record_defaults" + type_list_attr: "OUT_TYPE" + } + output_arg { + name: "output" + type_list_attr: "OUT_TYPE" + } + attr { + name: "OUT_TYPE" + type: "list(type)" + has_minimum: true + minimum: 1 + allowed_values { + list { + type: DT_FLOAT + type: DT_INT32 + type: DT_INT64 + type: DT_STRING + } + } + } + attr { + name: "field_delim" + type: "string" + default_value { + s: "," + } + } + attr { + name: "use_quote_delim" + type: "bool" + default_value { + b: true + } + } + attr { + name: "na_value" + type: "string" + default_value { + s: "" + } + } +} op { name: "DecodeGif" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 429000a058..9cda34a8c8 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -6219,6 +6219,14 @@ op { } description: "If false, treats double quotation marks as regular\ncharacters inside of the string fields (ignoring RFC 4180, Section 2,\nBullet 5)." } + attr { + name: "na_value" + type: "string" + default_value { + s: "" + } + description: "Additional string to recognize as NA/NaN." + } summary: "Convert CSV records to tensors. Each column maps to one tensor." description: "RFC 4180 format is expected for the CSV records.\n(https://tools.ietf.org/html/rfc4180)\nNote that we allow leading and trailing spaces with int or float field." } @@ -6505,7 +6513,7 @@ op { } input_arg { name: "row_shape" - description: "A vector representing the dense shape of each row in the produced\nSparseTensor." + description: "A vector representing the dense shape of each row in the produced\nSparseTensor. The shape may be partially specified, using `-1` to indicate\nthat a particular dimension should use the maximum size of all batch elements." type: DT_INT64 } output_arg { -- GitLab From 958a321b0e7a9e5ba07b536024c41615188b547d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 12:33:19 -0700 Subject: [PATCH 131/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171328576 --- tensorflow/go/op/wrappers.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 29c69b3c59..f2ee710a9e 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -5720,7 +5720,8 @@ func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source // batch_size: A scalar representing the number of elements to accumulate in a // batch. // row_shape: A vector representing the dense shape of each row in the produced -// SparseTensor. +// SparseTensor. The shape may be partially specified, using `-1` to indicate +// that a particular dimension should use the maximum size of all batch elements. // // func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { @@ -9313,6 +9314,16 @@ func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { } } +// DecodeCSVNaValue sets the optional na_value attribute to value. +// +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["na_value"] = value + } +} + // Convert CSV records to tensors. Each column maps to one tensor. // // RFC 4180 format is expected for the CSV records. -- GitLab From e2e57bd0bb122abec220bcb399ebeaefdb61e5b2 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 12:52:31 -0700 Subject: [PATCH 132/909] [XLA:LLVM] Remove SetTbaaForInstruction. This was made a nop some time ago because it was broken; this patch removes it entirely. I don't think we can sensibly use HLO types for alias analysis -- a buffer may store values of different HLO types over its lifetime. This isn't an indictment against LLVM TBAA in general; we may be able to use it for something other than AA based on HLO types. PiperOrigin-RevId: 171330686 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 5 ----- tensorflow/compiler/xla/service/layout_assignment.cc | 2 -- tensorflow/compiler/xla/service/llvm_ir/ir_array.cc | 4 ---- tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc | 7 ------- tensorflow/compiler/xla/service/llvm_ir/llvm_util.h | 6 ------ tensorflow/compiler/xla/service/llvm_ir/ops.cc | 1 - 6 files changed, 25 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index ec9a69709d..85f790a717 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1447,9 +1447,6 @@ Status IrEmitter::HandleParameter(HloInstruction* parameter) { param_address_untyped, IrShapeType(param_shape)->getPointerTo()); emitted_value_[parameter] = param_address_typed; - // Parameters of different types may not alias one another. - llvm_ir::SetTbaaForInstruction(param_address_untyped, param_shape, - /*is_pointer_to=*/true); if (!ShapeUtil::IsOpaque(param_shape)) { AttachAlignmentMetadataForLoad(param_address_untyped, param_shape); AttachDereferenceableMetadataForLoad(param_address_untyped, param_shape); @@ -2867,8 +2864,6 @@ llvm::Value* IrEmitter::EmitTempBufferPointer( llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(tempbuf_address_base->getContext(), /*MDs=*/{})); } - llvm_ir::SetTbaaForInstruction(tempbuf_address_base, target_shape, - /*is_pointer_to=*/true); AttachAlignmentMetadataForLoad(tempbuf_address_base, allocation.size()); AttachDereferenceableMetadataForLoad(tempbuf_address_base, allocation.size()); diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 8fd330fda7..2058706f11 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1180,8 +1180,6 @@ Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, // to match the layout of its corresponding fusion instruction operand. Also, // set the layout of the fused root to match the layout of the fusion // instruction itself. -// Fused GetTupleElement requires a layout so that TBAA metadata for the tuple -// element array pointer load can be added. Status SetFusionLayouts(HloInstruction* fusion) { TF_RET_CHECK(fusion->opcode() == HloOpcode::kFusion); for (auto* fused_instruction : fusion->fused_instructions()) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index e36c791c1a..6a00a565c6 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -268,8 +268,6 @@ llvm::Value* IrArray::EmitReadArrayElement(const Index& index, llvm::Value* element_address = EmitArrayElementAddress(index, ir_builder, name); llvm::LoadInst* load = ir_builder->CreateLoad(element_address); - llvm_ir::SetTbaaForInstruction(load, GetShape(), - /*is_pointer_to=*/false); AnnotateLoadStoreInstructionWithMetadata(load); return load; } @@ -278,8 +276,6 @@ void IrArray::EmitWriteArrayElement(const Index& index, llvm::Value* value, llvm::IRBuilder<>* ir_builder) const { llvm::Value* element_address = EmitArrayElementAddress(index, ir_builder); llvm::StoreInst* store = ir_builder->CreateStore(value, element_address); - llvm_ir::SetTbaaForInstruction(store, GetShape(), - /*is_pointer_to=*/false); AnnotateLoadStoreInstructionWithMetadata(store); } diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 4a7d2b48f7..8e188e7ae8 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -402,13 +402,6 @@ void EmitLogging(const char* tag, llvm::Value* value, {ir_builder->getInt64(tensorflow::bit_cast(tag)), value}); } -void SetTbaaForInstruction(llvm::Instruction* instruction, Shape shape, - bool is_pointer_to) { - // TODO(b/62903316): TBAA metadata causes LLVM to miscompile generated code, - // most likely because the generated metadata is incorrect. Disable TBAA - // metadata while we resolve this. -} - void SetAlignmentMetadataForLoad(llvm::LoadInst* load, uint64_t alignment) { llvm::LLVMContext& context = load->getContext(); llvm::Type* int64_ty = llvm::Type::getInt64Ty(context); diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index 5af62b056e..7a7d14da1e 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -227,12 +227,6 @@ llvm::Value* EmitComparison(llvm::CmpInst::Predicate predicate, void EmitLogging(const char* tag, llvm::Value* value, llvm::IRBuilder<>* ir_builder); -// Adds TBAA metadata to a load or store instruction using the given shape as -// it's type. The is_pointer_to parameter is used to indicate whether or not -// this instruction loads or stores a pointer to an array. -void SetTbaaForInstruction(llvm::Instruction* instruction, Shape shape, - bool is_pointer_to); - // Adds alignment metadata to a load instruction using the given alignment. // The alignment refers to the result of the load, not the load itself. void SetAlignmentMetadataForLoad(llvm::LoadInst* load, uint64_t alignment); diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.cc b/tensorflow/compiler/xla/service/llvm_ir/ops.cc index 3965433494..60777bc8a8 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ops.cc @@ -89,7 +89,6 @@ llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index, llvm::Value* element_ptr = ir_builder->CreateInBoundsGEP( operand, {ir_builder->getInt64(0), ir_builder->getInt64(index)}); llvm::LoadInst* src_buffer = ir_builder->CreateLoad(element_ptr); - SetTbaaForInstruction(src_buffer, target_shape, /*is_pointer_to=*/true); SetAlignmentMetadataForLoad(src_buffer, alignment); llvm::Type* element_type = ShapeToIrType(target_shape, ir_builder); llvm::Value* ret_val = -- GitLab From b1c095a28a7aa9bbee4af4d9a7e9d0c60567765b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 13:01:33 -0700 Subject: [PATCH 133/909] Bugfix: Ensure tf.distributions.Multinomial doesn't underflow in log_prob. PiperOrigin-RevId: 171331659 --- .../python/kernel_tests/distributions/multinomial_test.py | 7 +++++++ tensorflow/python/ops/distributions/multinomial.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py index 80caf10391..614a34f077 100644 --- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py +++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py @@ -76,6 +76,13 @@ class MultinomialTest(test.TestCase): self.assertAllClose(p, multinom.probs.eval()) self.assertAllClose(logits, multinom.logits.eval()) + def testPmfUnderflow(self): + logits = np.array([[-200, 0]], dtype=np.float32) + with self.test_session(): + dist = multinomial.Multinomial(total_count=1., logits=logits) + lp = dist.log_prob([1., 0.]).eval()[0] + self.assertAllClose(-200, lp, atol=0, rtol=1e-6) + def testPmfandCountsAgree(self): p = [[0.1, 0.2, 0.7]] n = [[5.]] diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index 9b15d4c76e..00b5697c83 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -24,6 +24,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -260,7 +261,7 @@ class Multinomial(distribution.Distribution): def _log_unnormalized_prob(self, counts): counts = self._maybe_assert_valid_sample(counts) - return math_ops.reduce_sum(counts * math_ops.log(self.probs), -1) + return math_ops.reduce_sum(counts * nn_ops.log_softmax(self.logits), -1) def _log_normalization(self, counts): counts = self._maybe_assert_valid_sample(counts) -- GitLab From 129947535edd50225b7a6bbe620ea58c6d32953c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 13:15:12 -0700 Subject: [PATCH 134/909] Fixed a typo in a message from the debugger. PiperOrigin-RevId: 171333405 --- tensorflow/python/debug/cli/cli_shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/debug/cli/cli_shared.py b/tensorflow/python/debug/cli/cli_shared.py index c3c9a332a7..df972eacf7 100644 --- a/tensorflow/python/debug/cli/cli_shared.py +++ b/tensorflow/python/debug/cli/cli_shared.py @@ -347,7 +347,7 @@ def get_run_start_intro(run_call_count, out = debugger_cli_common.RichTextLines(_HORIZONTAL_BAR) if is_callable_runner: - out.append("Running a runner returned by Session.make_callabe()") + out.append("Running a runner returned by Session.make_callable()") else: out.append("Session.run() call #%d:" % run_call_count) out.append("") -- GitLab From 2a90713ef70f01392ac59899ca92376549c57126 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 13:25:28 -0700 Subject: [PATCH 135/909] [XLA:CPU] Mark pointers loaded via get-tuple-element as dereferenceable. PiperOrigin-RevId: 171334827 --- tensorflow/compiler/xla/service/llvm_ir/ops.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.cc b/tensorflow/compiler/xla/service/llvm_ir/ops.cc index 60777bc8a8..ae5c666b7d 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ops.cc @@ -89,7 +89,15 @@ llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index, llvm::Value* element_ptr = ir_builder->CreateInBoundsGEP( operand, {ir_builder->getInt64(0), ir_builder->getInt64(index)}); llvm::LoadInst* src_buffer = ir_builder->CreateLoad(element_ptr); + + // Mark the loaded pointer as dereferenceable if we know its shape. + if (!ShapeUtil::IsOpaque(target_shape)) { + SetDereferenceableMetadataForLoad( + src_buffer, + ByteSizeOf(target_shape, src_buffer->getModule()->getDataLayout())); + } SetAlignmentMetadataForLoad(src_buffer, alignment); + llvm::Type* element_type = ShapeToIrType(target_shape, ir_builder); llvm::Value* ret_val = ir_builder->CreateBitCast(src_buffer, element_type->getPointerTo()); -- GitLab From 30c5f4347b722961a40eab483f2391a92d9088bb Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 6 Oct 2017 13:52:17 -0700 Subject: [PATCH 136/909] Fix float32 precision causing test failure in gcs cloud TF tests. The time in nanoseconds was being cast to float32 which caused loss of precision. Because floats are used when parsing the time, the time calculation can still be rounded incorrectly. Also changing EXPECT_EQ to EXPECT_NEAR(,,1). PiperOrigin-RevId: 171338952 --- tensorflow/core/platform/cloud/BUILD | 2 -- tensorflow/core/platform/cloud/gcs_file_system_test.cc | 4 ++-- tensorflow/core/platform/cloud/time_util.cc | 3 ++- tensorflow/core/platform/cloud/time_util_test.cc | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index c06004e747..c937fea049 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -228,7 +228,6 @@ tf_cc_test( name = "gcs_file_system_test", size = "small", srcs = ["gcs_file_system_test.cc"], - tags = ["nomac"], # b/67103845 deps = [ ":gcs_file_system", ":http_request_fake", @@ -304,7 +303,6 @@ tf_cc_test( name = "time_util_test", size = "small", srcs = ["time_util_test.cc"], - tags = ["nomac"], # b/67103845 deps = [ ":time_util", "//tensorflow/core:test", diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index b8573e335d..911176365f 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -1637,7 +1637,7 @@ TEST(GcsFileSystemTest, Stat_Object) { FileStatistics stat; TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); EXPECT_EQ(1010, stat.length); - EXPECT_EQ(1461971724896, stat.mtime_nsec / 1000 / 1000); + EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); } @@ -1771,7 +1771,7 @@ TEST(GcsFileSystemTest, Stat_Cache) { FileStatistics stat; TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); EXPECT_EQ(1010, stat.length); - EXPECT_EQ(1461971724896, stat.mtime_nsec / 1000 / 1000); + EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat)); EXPECT_EQ(0, stat.length); diff --git a/tensorflow/core/platform/cloud/time_util.cc b/tensorflow/core/platform/cloud/time_util.cc index 633733a21c..2f8643f3c7 100644 --- a/tensorflow/core/platform/cloud/time_util.cc +++ b/tensorflow/core/platform/cloud/time_util.cc @@ -44,7 +44,8 @@ Status ParseRfc3339Time(const string& time, int64* mtime_nsec) { parsed.tm_sec = int_seconds; *mtime_nsec = timegm(&parsed) * kNanosecondsPerSecond + - floor((seconds - int_seconds) * kNanosecondsPerSecond); + static_cast( + floor((seconds - int_seconds) * kNanosecondsPerSecond)); return Status::OK(); } diff --git a/tensorflow/core/platform/cloud/time_util_test.cc b/tensorflow/core/platform/cloud/time_util_test.cc index 3fd8fcdab0..1f975f7325 100644 --- a/tensorflow/core/platform/cloud/time_util_test.cc +++ b/tensorflow/core/platform/cloud/time_util_test.cc @@ -23,7 +23,7 @@ TEST(TimeUtil, ParseRfc3339Time) { int64 mtime_nsec; TF_EXPECT_OK(ParseRfc3339Time("2016-04-29T23:15:24.896Z", &mtime_nsec)); // Compare milliseconds instead of nanoseconds. - EXPECT_EQ(1461971724896, mtime_nsec / 1000 / 1000); + EXPECT_NEAR(1461971724896, mtime_nsec / 1000 / 1000, 1); } TEST(TimeUtil, ParseRfc3339Time_ParseError) { -- GitLab From ac2e086d1811be3d41b14f79d9c5c71ec98a1105 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 6 Oct 2017 14:20:41 -0700 Subject: [PATCH 137/909] Explicitly tag constants in LLVM IR with required alignment (We are most likely getting lucky with this today, but this will eventually blow up.) PiperOrigin-RevId: 171343275 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 85f790a717..8132207699 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -281,6 +281,7 @@ Status IrEmitter::HandleConstant(HloInstruction* constant, /*Linkage=*/llvm::GlobalValue::PrivateLinkage, /*Initializer=*/initializer, /*Name=*/""); + global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); emitted_value_[constant] = global_for_const; VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const); VLOG(2) << " its type: " -- GitLab From bbfef93661ebf8ec23c7b9ad920313be9898bbbc Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 6 Oct 2017 14:47:55 -0700 Subject: [PATCH 138/909] Convert shape to TensorShape when creating _VariableFromResource Ensures that variable shapes are TensorShapes when accessed in graph_callable functions. PiperOrigin-RevId: 171347097 --- tensorflow/python/eager/graph_callable.py | 3 ++- tensorflow/python/eager/graph_callable_test.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 64d1659993..e3aacbd140 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -28,6 +28,7 @@ from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops as tf_ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope @@ -54,7 +55,7 @@ class _VariableFromResource(resource_variable_ops.ResourceVariable): def __init__(self, resource, dtype, name, shape): self._handle = resource - self._graph_shape = shape + self._graph_shape = tensor_shape.as_shape(shape) self._handle_device = resource.device self._handle_name = name self._cached_value = None diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index 4ad8f1f36e..104e019391 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -22,6 +22,7 @@ from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope @@ -209,6 +210,15 @@ class GraphCallableTest(test.TestCase): ret = my_op(inputs) self.assertEqual(ret[1].numpy(), 11.) + def testVariableShapeIsTensorShape(self): + @graph_callable.graph_callable([]) + def my_function(): + v = variable_scope.get_variable( + "v", initializer=init_ops.zeros_initializer(), shape=()) + self.assertIsInstance(v.get_shape(), tensor_shape.TensorShape) + + my_function() + if __name__ == "__main__": test.main() -- GitLab From eb1a0a5294b9b7b209d419b4113fb57d6443b45f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 14:53:56 -0700 Subject: [PATCH 139/909] (1) Adds broadcasting to scaled_softplus (2) Adds the ability to clip (so we can get a soft version of relu6) PiperOrigin-RevId: 171347879 --- .../contrib/nn/python/ops/scaled_softplus.py | 82 ++++++++++++++----- .../nn/python/ops/scaled_softplus_test.py | 23 ++++-- 2 files changed, 77 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/nn/python/ops/scaled_softplus.py b/tensorflow/contrib/nn/python/ops/scaled_softplus.py index 5fc11d8ec6..fcbfbc239c 100644 --- a/tensorflow/contrib/nn/python/ops/scaled_softplus.py +++ b/tensorflow/contrib/nn/python/ops/scaled_softplus.py @@ -20,58 +20,96 @@ from __future__ import print_function from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn -def scaled_softplus(x, alpha, name=None): - """Returns `alpha * ln(1 + exp(x / alpha))`, for scalar `alpha > 0`. +def _reduce_and_reshape_grad(g, t): + """Returns the gradient, sum-reduced and reshaped to `t`'s shape.""" + shape = array_ops.shape(t) + g_shape = array_ops.shape(g) + # pylint: disable=protected-access + bcast_dims, _ = gen_array_ops._broadcast_gradient_args(shape, g_shape) + # pylint: enable=protected-access + return array_ops.reshape(math_ops.reduce_sum(g, bcast_dims), shape) + + +def scaled_softplus(x, alpha, clip=None, name=None): + """Returns `y = alpha * ln(1 + exp(x / alpha))` or `min(y, clip)`. This can be seen as a softplus applied to the scaled input, with the output appropriately scaled. As `alpha` tends to 0, `scaled_softplus(x, alpha)` tends - to `relu(x)`. + to `relu(x)`. The clipping is optional. As alpha->0, scaled_softplus(x, alpha) + tends to relu(x), and scaled_softplus(x, alpha, clip=6) tends to relu6(x). Note: the gradient for this operation is defined to depend on the backprop inputs as well as the outputs of this operation. Args: x: A `Tensor` of inputs. - alpha: A scalar `Tensor`, indicating the amount of smoothness. The caller + alpha: A `Tensor`, indicating the amount of smoothness. The caller must ensure that `alpha > 0`. + clip: (optional) A `Tensor`, the upper bound to clip the values. name: A name for the scope of the operations (optional). Returns: - A tensor of same size and type as `x`. + A tensor of the size and type determined by broadcasting of the inputs. """ - with ops.name_scope(name, 'scaled_softplus', [x, alpha]): + clipping = clip is not None + with ops.name_scope(name, 'scaled_softplus', + [x, alpha] + ([clip] if clipping else [])): x = ops.convert_to_tensor(x, name='x') dtype = x.dtype alpha = ops.convert_to_tensor(alpha, dtype=dtype, name='alpha') - # Verify that alpha is a scalar. - alpha.get_shape().assert_has_rank(0) + # Compute the forward value. + y = alpha * nn.softplus(x / alpha) + if clipping: + clip = ops.convert_to_tensor(clip, dtype=dtype, name='clip') + y = math_ops.minimum(y, clip) def _grad(op, g): - """Backprop for scaled softplus.""" - y = op.outputs[0] - alpha = op.inputs[1] - # Prevent the expensive computations from happening before g is available. + """Backprop for scaled softplus, with optional clipping.""" + y, x, alpha = op.inputs[:3] + # Prevent the memory-expensive computations from happening before g is + # available. with ops.control_dependencies([g]): - y /= alpha + y = array_ops.identity(y) + clip_grad = [] + if clipping: + clip = op.inputs[3] + unclipped = math_ops.cast(y < clip, g.dtype) + clip_grad = [_reduce_and_reshape_grad(g * (1. - unclipped), clip)] + g *= unclipped + y /= alpha emy = math_ops.exp(-y) dy_dx = 1. - emy # The eps below avoids log(0). Note that t*log(t) -> 0 as t->0. eps = 1e-8 dy_dalpha = y * emy - dy_dx * math_ops.log(dy_dx + eps) - return g * dy_dx, math_ops.reduce_sum(g * dy_dalpha) + # Backprop to the actual inputs, but not to the output. + return [None, + _reduce_and_reshape_grad(g * dy_dx, x), + _reduce_and_reshape_grad(g * dy_dalpha, alpha)] + clip_grad - @function.Defun(dtype, dtype, - func_name='ScaledSoftplus_%s' % dtype.name, - shape_func=lambda op: [op.inputs[0].get_shape()], + if clipping: + @function.Defun(dtype, dtype, dtype, dtype, + func_name='ScaledSoftplusHelper_clip_%s' % dtype.name, + shape_func=lambda op: [op.inputs[0].shape], + python_grad_func=_grad) + def _forward_helper_clip(y, x, alpha, clip): + del x, alpha, clip # Unused. + return y + return _forward_helper_clip(y, x, alpha, clip) + # No clipping. + @function.Defun(dtype, dtype, dtype, + func_name='ScaledSoftplusHelper_%s' % dtype.name, + shape_func=lambda op: [op.inputs[0].shape], python_grad_func=_grad) - def _forward(x, alpha): - """Forward computation of scaled softplus.""" - return alpha * nn.softplus(x / alpha) - - return _forward(x, alpha) + def _forward_helper(y, x, alpha): + del x, alpha # Unused. + return y + return _forward_helper(y, x, alpha) diff --git a/tensorflow/contrib/nn/python/ops/scaled_softplus_test.py b/tensorflow/contrib/nn/python/ops/scaled_softplus_test.py index 3a459330ce..b978343c6a 100644 --- a/tensorflow/contrib/nn/python/ops/scaled_softplus_test.py +++ b/tensorflow/contrib/nn/python/ops/scaled_softplus_test.py @@ -33,10 +33,11 @@ class ScaledSoftplusTest(test.TestCase): x = np.random.randn(3, 4).astype(np.float32) x64 = np.random.randn(3, 4).astype(np.float64) alpha = np.random.rand() + 0.01 - y = alpha * np.log(1. + np.exp(x / alpha)) + clip = np.float32(0.1) + y = np.minimum(alpha * np.log(1. + np.exp(x / alpha)), clip) y64 = alpha * np.log(1. + np.exp(x64 / alpha)) with self.test_session(use_gpu=True) as sess: - z = scaled_softplus(constant_op.constant(x), alpha) + z = scaled_softplus(constant_op.constant(x), alpha, clip) z64 = scaled_softplus(constant_op.constant(x64), alpha) z, z64 = sess.run([z, z64]) eps = 1e-6 @@ -47,18 +48,28 @@ class ScaledSoftplusTest(test.TestCase): np.random.seed(1) # Make it reproducible. x_shape = [5, 10] x_np = np.random.randn(*x_shape).astype(np.float32) - alpha_np = np.float32(np.random.rand() + 0.01) + alpha_np = np.float32(np.random.rand(1, x_shape[1]) + 0.01) + clip_np = np.float32(np.random.rand(x_shape[0], 1) * 5.) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np) alpha_tf = constant_op.constant(alpha_np) + clip_tf = constant_op.constant(clip_np) y_tf = scaled_softplus(x_tf, alpha_tf) + z_tf = scaled_softplus(x_tf, alpha_tf, clip_tf * 0.1) err = gradient_checker.compute_gradient_error([x_tf, alpha_tf], - [x_shape, []], + [x_shape, alpha_np.shape], y_tf, x_shape, [x_np, alpha_np], - delta=1e-2) - eps = 1e-4 + delta=0.002) + err_clip = gradient_checker.compute_gradient_error( + [x_tf, alpha_tf, clip_tf], + [x_shape, alpha_np.shape, clip_np.shape], + z_tf, x_shape, + [x_np, alpha_np, clip_np], + delta=0.002) + eps = 2e-4 self.assertLess(err, eps) + self.assertLess(err_clip, eps) if __name__ == '__main__': -- GitLab From e744cca9861b175f93e3e2bd72b38731a9f1fca7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 14:55:17 -0700 Subject: [PATCH 140/909] Changes Relu6Grad to depend on relu6's output rather than its input, for consistency with relu. This would result in memory savings when training conv->relu6->bn and conv->bn->relu6->conv models, as the inputs to bn and conv are already retained for backprop. PiperOrigin-RevId: 171348086 --- tensorflow/core/kernels/relu_op_functor.h | 7 ++++--- tensorflow/core/ops/nn_ops.cc | 3 ++- tensorflow/python/ops/nn_grad.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 9577b963c6..24b789c543 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -76,14 +76,15 @@ struct Relu6Grad { // Computes Relu6Grad backprops. // // gradients: gradients backpropagated to the Relu6 op. - // features: inputs that where passed to the Relu6 op. + // features: inputs that where passed to the Relu6 op, or its outputs. // backprops: gradients to backpropagate to the Relu6 inputs. void operator()(const Device& d, typename TTypes::ConstTensor gradients, typename TTypes::ConstTensor features, typename TTypes::Tensor backprops) { // NOTE: When the activation is exactly zero or six, we - // arbitrarily choose to not propagate the associated gradient - // value. + // make sure not to propagate the associated gradient + // value. This allows "features" to be either the input or the output of + // the relu6. backprops.device(d) = gradients * ((features > static_cast(0)) * (features < static_cast(6))) diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index b34dc1a008..5efa55b496 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1851,7 +1851,8 @@ REGISTER_OP("Relu6Grad") Computes rectified linear 6 gradients for a Relu6 operation. gradients: The backpropagated gradients to the corresponding Relu6 operation. -features: The features passed as input to the corresponding Relu6 operation. +features: The features passed as input to the corresponding Relu6 operation, or + its output; using either one produces the same result. backprops: The gradients: `gradients * (features > 0) * (features < 6)`. )doc"); diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 7dcd72968a..af610d8fdb 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -349,7 +349,7 @@ def _SeluGradGrad(op, grad): @ops.RegisterGradient("Relu6") def _Relu6Grad(op, grad): - return gen_nn_ops._relu6_grad(grad, op.inputs[0]) + return gen_nn_ops._relu6_grad(grad, op.outputs[0]) # pylint: disable=protected-access @ops.RegisterGradient("Elu") -- GitLab From 25e6d2331b9e79df9e7a1f296ecc02064ff7c43e Mon Sep 17 00:00:00 2001 From: Vinu Rajashekhar Date: Fri, 6 Oct 2017 15:09:16 -0700 Subject: [PATCH 141/909] Adds helpers for bucketing strategies for TF monitoring samplers. - Adds explicit and exponential strategies for now. PiperOrigin-RevId: 171350246 --- .../monitoring/collection_registry_test.cc | 4 +- .../core/lib/monitoring/mobile_sampler.h | 37 +++++- tensorflow/core/lib/monitoring/sampler.cc | 112 ++++++++++++++++++ tensorflow/core/lib/monitoring/sampler.h | 66 ++++++----- .../core/lib/monitoring/sampler_test.cc | 35 +++++- 5 files changed, 216 insertions(+), 38 deletions(-) create mode 100644 tensorflow/core/lib/monitoring/sampler.cc diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc index 34a480b07d..5b9c100690 100644 --- a/tensorflow/core/lib/monitoring/collection_registry_test.cc +++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc @@ -188,10 +188,10 @@ TEST(CollectMetricsTest, Sampler) { auto sampler_with_labels = std::unique_ptr>( Sampler<2>::New({"/tensorflow/test/sampler_with_labels", "Sampler with labels.", "MyLabel0", "MyLabel1"}, - {1.0, 2.0})); + Buckets::Explicit({1.0, 2.0}))); auto sampler_without_labels = std::unique_ptr>(Sampler<0>::New( {"/tensorflow/test/sampler_without_labels", "Sampler without labels."}, - {0.0})); + Buckets::Explicit({0.0}))); Histogram with_labels0({1.0, 2.0, DBL_MAX}); sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7); diff --git a/tensorflow/core/lib/monitoring/mobile_sampler.h b/tensorflow/core/lib/monitoring/mobile_sampler.h index 5499237347..cf390e5c7f 100644 --- a/tensorflow/core/lib/monitoring/mobile_sampler.h +++ b/tensorflow/core/lib/monitoring/mobile_sampler.h @@ -18,7 +18,10 @@ limitations under the License. #ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ #define THIRD_PARTY_TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_ +#include + #include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/lib/monitoring/metric_def.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" @@ -38,6 +41,33 @@ class SamplerCell { TF_DISALLOW_COPY_AND_ASSIGN(SamplerCell); }; +// Buckets which has a null implementation. +class Buckets { + public: + Buckets() = default; + ~Buckets() = default; + + static std::unique_ptr Explicit( + std::initializer_list bucket_limits) { + return std::unique_ptr(new Buckets()); + } + + static std::unique_ptr Exponential(double scale, + double growth_factor, + int bucket_count) { + return std::unique_ptr(new Buckets()); + } + + const std::vector& explicit_bounds() const { + return explicit_bounds_; + } + + private: + std::vector explicit_bounds_; + + TF_DISALLOW_COPY_AND_ASSIGN(Buckets); +}; + // Sampler which has a null implementation. template class Sampler { @@ -47,8 +77,8 @@ class Sampler { template static Sampler* New(const MetricDef& metric_def, - const std::vector& explicit_bucket_limits) { - return new Sampler(); + std::unique_ptr buckets) { + return new Sampler(std::move(buckets)); } template @@ -57,9 +87,10 @@ class Sampler { } private: - Sampler() {} + Sampler(std::unique_ptr buckets) : buckets_(std::move(buckets)) {} SamplerCell default_sampler_cell_; + std::unique_ptr buckets_; TF_DISALLOW_COPY_AND_ASSIGN(Sampler); }; diff --git a/tensorflow/core/lib/monitoring/sampler.cc b/tensorflow/core/lib/monitoring/sampler.cc new file mode 100644 index 0000000000..23d3668fbd --- /dev/null +++ b/tensorflow/core/lib/monitoring/sampler.cc @@ -0,0 +1,112 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/monitoring/sampler.h" + +// We replace this implementation with a null implementation for mobile +// platforms. +#include "tensorflow/core/platform/platform.h" +#ifdef IS_MOBILE_PLATFORM +// Do nothing. +#else + +namespace tensorflow { +namespace monitoring { +namespace { + +class ExplicitBuckets : public Buckets { + public: + ~ExplicitBuckets() override = default; + + explicit ExplicitBuckets(std::vector bucket_limits) + : bucket_limits_(std::move(bucket_limits)) { + CHECK_GT(bucket_limits_.size(), 0); + // Verify that the bucket boundaries are strictly increasing + for (size_t i = 1; i < bucket_limits_.size(); i++) { + CHECK_GT(bucket_limits_[i], bucket_limits_[i - 1]); + } + // We augment the bucket limits so that all boundaries are within [-DBL_MAX, + // DBL_MAX]. + // + // Since we use ThreadSafeHistogram, we don't have to explicitly add + // -DBL_MAX, because it uses these limits as upper-bounds, so + // bucket_count[0] is always the number of elements in + // [-DBL_MAX, bucket_limits[0]). + if (bucket_limits_.back() != DBL_MAX) { + bucket_limits_.push_back(DBL_MAX); + } + } + + const std::vector& explicit_bounds() const override { + return bucket_limits_; + } + + private: + std::vector bucket_limits_; + + TF_DISALLOW_COPY_AND_ASSIGN(ExplicitBuckets); +}; + +class ExponentialBuckets : public Buckets { + public: + ~ExponentialBuckets() override = default; + + ExponentialBuckets(double scale, double growth_factor, int bucket_count) + : explicit_buckets_( + ComputeBucketLimits(scale, growth_factor, bucket_count)) {} + + const std::vector& explicit_bounds() const override { + return explicit_buckets_.explicit_bounds(); + } + + private: + static std::vector ComputeBucketLimits(double scale, + double growth_factor, + int bucket_count) { + CHECK_GT(bucket_count, 0); + std::vector bucket_limits; + double bound = scale; + for (int i = 0; i < bucket_count; i++) { + bucket_limits.push_back(bound); + bound *= growth_factor; + } + return bucket_limits; + } + + ExplicitBuckets explicit_buckets_; + + TF_DISALLOW_COPY_AND_ASSIGN(ExponentialBuckets); +}; + +} // namespace + +// static +std::unique_ptr Buckets::Explicit( + std::initializer_list bucket_limits) { + return std::unique_ptr(new ExplicitBuckets(bucket_limits)); +} + +// static +std::unique_ptr Buckets::Exponential(double scale, + double growth_factor, + int bucket_count) { + return std::unique_ptr( + new ExponentialBuckets(scale, growth_factor, bucket_count)); +} + +} // namespace monitoring +} // namespace tensorflow + +#endif // IS_MOBILE_PLATFORM diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h index 3932f8d1a7..5a4d49d5d4 100644 --- a/tensorflow/core/lib/monitoring/sampler.h +++ b/tensorflow/core/lib/monitoring/sampler.h @@ -65,12 +65,40 @@ class SamplerCell { TF_DISALLOW_COPY_AND_ASSIGN(SamplerCell); }; +// Bucketing strategies for the samplers. +// +// We automatically add -DBL_MAX and DBL_MAX to the ranges, so that no sample +// goes out of bounds. +// +// WARNING: If you are changing the interface here, please do change the same in +// mobile_sampler.h. +class Buckets { + public: + virtual ~Buckets() = default; + + // Sets up buckets of the form: + // [-DBL_MAX, ..., scale * growth^i, + // scale * growth_factor^(i + 1), ..., DBL_MAX]. + // + // So for powers of 2 with a bucket count of 10, you would say (1, 2, 10) + static std::unique_ptr Exponential(double scale, + double growth_factor, + int bucket_count); + + // Sets up buckets of the form: + // [-DBL_MAX, ..., bucket_limits[i], bucket_limits[i + 1], ..., DBL_MAX]. + static std::unique_ptr Explicit( + std::initializer_list bucket_limits); + + virtual const std::vector& explicit_bounds() const = 0; +}; + // A stateful class for updating a cumulative histogram metric. // // This class encapsulates a set of histograms (or a single histogram for a // label-less metric) configured with a list of increasing bucket boundaries. -// Each histogram is identified by a tuple of labels. The class allows the user -// to add a sample to each histogram value. +// Each histogram is identified by a tuple of labels. The class allows the +// user to add a sample to each histogram value. // // Sampler allocates storage and maintains a cell for each value. You can // retrieve an individual cell using a label-tuple and update it separately. @@ -86,21 +114,14 @@ class Sampler { registration_handle_.reset(); } - // Creates the metric based on the metric-definition arguments. + // Creates the metric based on the metric-definition arguments and buckets. // // Example; // auto* sampler_with_label = Sampler<1>::New({"/tensorflow/sampler", // "Tensorflow sampler", "MyLabelName"}, {10.0, 20.0, 30.0}); - // - // We automatically add -DBL_MAX and DBL_MAX to the list of bucket limits, so - // that no sample goes out of bounds. So for the above example, the ranges end - // up being: [-DBL_Max, 10.0, 20.0, 30.0, DBL_MAX] - // - // REQUIRES: bucket_limits[i] values are monotonically increasing. - // REQUIRES: bucket_limits is not empty(). static Sampler* New(const MetricDef& metric_def, - const std::vector& bucket_limits); + std::unique_ptr buckets); // Retrieves the cell for the specified labels, creating it on demand if // not already present. @@ -112,9 +133,9 @@ class Sampler { Sampler(const MetricDef& metric_def, - const std::vector& bucket_limits) + std::unique_ptr buckets) : metric_def_(metric_def), - bucket_limits_(bucket_limits), + buckets_(std::move(buckets)), registration_handle_(CollectionRegistry::Default()->Register( &metric_def_, [&](MetricCollectorGetter getter) { auto metric_collector = getter.Get(&metric_def_); @@ -133,7 +154,7 @@ class Sampler { metric_def_; // Bucket limits for the histograms in the cells. - const std::vector bucket_limits_; + std::unique_ptr buckets_; // Registration handle with the CollectionRegistry. std::unique_ptr registration_handle_; @@ -162,19 +183,8 @@ template Sampler* Sampler::New( const MetricDef& metric_def, - const std::vector& bucket_limits) { - CHECK_GT(bucket_limits.size(), 0); - // Verify that the bucket boundaries are strictly increasing - for (size_t i = 1; i < bucket_limits.size(); i++) { - CHECK_GT(bucket_limits[i], bucket_limits[i - 1]); - } - std::vector augmented_bucket_limits(bucket_limits); - // We add DBL_MAX to the end so that all boundaries are within [-DBL_MAX, - // DBL_MAX]. - if (bucket_limits.back() != DBL_MAX) { - augmented_bucket_limits.push_back(DBL_MAX); - } - return new Sampler(metric_def, augmented_bucket_limits); + std::unique_ptr buckets) { + return new Sampler(metric_def, std::move(buckets)); } template @@ -196,7 +206,7 @@ SamplerCell* Sampler::GetCell(const Labels&... labels) return &(cells_ .emplace(std::piecewise_construct, std::forward_as_tuple(label_array), - std::forward_as_tuple(bucket_limits_)) + std::forward_as_tuple(buckets_->explicit_bounds())) .first->second); } diff --git a/tensorflow/core/lib/monitoring/sampler_test.cc b/tensorflow/core/lib/monitoring/sampler_test.cc index 27e1ccca3c..d61d858b6b 100644 --- a/tensorflow/core/lib/monitoring/sampler_test.cc +++ b/tensorflow/core/lib/monitoring/sampler_test.cc @@ -34,14 +34,14 @@ void EqHistograms(const Histogram& expected, auto* sampler_with_labels = Sampler<1>::New({"/tensorflow/test/sampler_with_labels", "Sampler with one label.", "MyLabel"}, - {10.0, 20.0}); + Buckets::Explicit({10.0, 20.0})); TEST(LabeledSamplerTest, InitializedEmpty) { Histogram empty; EqHistograms(empty, sampler_with_labels->GetCell("Empty")->value()); } -TEST(LabeledSamplerTest, BucketBoundaries) { +TEST(LabeledSamplerTest, ExplicitBucketBoundaries) { // Sampler automatically adds DBL_MAX to the list of buckets. Histogram expected({10.0, 20.0, DBL_MAX}); auto* cell = sampler_with_labels->GetCell("BucketBoundaries"); @@ -61,7 +61,7 @@ TEST(LabeledSamplerTest, BucketBoundaries) { auto* init_sampler_without_labels = Sampler<0>::New({"/tensorflow/test/init_sampler_without_labels", "Sampler without labels initialized as empty."}, - {1.5, 2.8}); + Buckets::Explicit({1.5, 2.8})); TEST(UnlabeledSamplerTest, InitializedEmpty) { Histogram empty; @@ -71,9 +71,9 @@ TEST(UnlabeledSamplerTest, InitializedEmpty) { auto* sampler_without_labels = Sampler<0>::New({"/tensorflow/test/sampler_without_labels", "Sampler without labels initialized as empty."}, - {1.5, 2.8}); + Buckets::Explicit({1.5, 2.8})); -TEST(UnlabeledSamplerTest, BucketBoundaries) { +TEST(UnlabeledSamplerTest, ExplicitBucketBoundaries) { // Sampler automatically adds DBL_MAX to the list of buckets. Histogram expected({1.5, 2.8, DBL_MAX}); auto* cell = sampler_without_labels->GetCell(); @@ -87,6 +87,31 @@ TEST(UnlabeledSamplerTest, BucketBoundaries) { EqHistograms(expected, cell->value()); } +auto* sampler_with_exponential = + Sampler<1>::New({"/tensorflow/test/sampler_with_exponential", + "Sampler with exponential buckets.", "MyLabel"}, + // So limits are {1, 2, 4}. + Buckets::Exponential(1, 2, 3)); + +TEST(ExponentialSamplerTest, ExponentialBucketBoundaries) { + // Sampler automatically adds DBL_MAX to the list of buckets. + Histogram expected({1.0, 2.0, 4.0, DBL_MAX}); + auto* cell = sampler_with_exponential->GetCell("BucketBoundaries"); + sampler_with_exponential->GetCell("AddedToCheckPreviousCellValidity"); + cell->Add(-1.0); + expected.Add(-1.0); + cell->Add(0.5); + expected.Add(0.5); + cell->Add(1.001); + expected.Add(1.001); + cell->Add(3.999); + expected.Add(3.999); + cell->Add(6.0); + expected.Add(6.0); + + EqHistograms(expected, cell->value()); +} + } // namespace } // namespace monitoring } // namespace tensorflow -- GitLab From ea513ed3ec78531af1ebdb25b2daf52bd688b4d0 Mon Sep 17 00:00:00 2001 From: ZxYuan Date: Fri, 6 Oct 2017 17:16:28 -0500 Subject: [PATCH 142/909] Update word2vec_basic.py (#13531) Use random.sample to simplify random selection of context words --- tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index 1fa2b14869..142e45a2e8 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -115,11 +115,9 @@ def generate_batch(batch_size, num_skips, skip_window): data_index += span for i in range(batch_size // num_skips): context_words = [w for w in range(span) if w != skip_window] - random.shuffle(context_words) - words_to_use = collections.deque(context_words) - for j in range(num_skips): + words_to_use = random.sample(context_words, num_skips) + for j, context_word in enumerate(words_to_use): batch[i * num_skips + j] = buffer[skip_window] - context_word = words_to_use.pop() labels[i * num_skips + j, 0] = buffer[context_word] if data_index == len(data): buffer[:] = data[:span] -- GitLab From c5f715f62e7d8c4fbf9244eefb9379f188e06b98 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 15:20:39 -0700 Subject: [PATCH 143/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171351986 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 9cda34a8c8..9abb4f7a5e 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -20208,7 +20208,7 @@ op { } input_arg { name: "features" - description: "The features passed as input to the corresponding Relu6 operation." + description: "The features passed as input to the corresponding Relu6 operation, or\nits output; using either one produces the same result." type_attr: "T" } output_arg { -- GitLab From 710efeecbffad94259bdcf5d19ca3a83043cf145 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Fri, 6 Oct 2017 15:25:16 -0700 Subject: [PATCH 144/909] Bump min graph consumer version when adding functions to it PiperOrigin-RevId: 171352662 --- tensorflow/core/graph/graph.cc | 9 +++++++++ tensorflow/core/graph/graph_partition_test.cc | 5 ++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 2ad0081e1f..daefb6b1fb 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -293,6 +293,11 @@ Graph::Graph(const OpRegistryInterface* ops) Graph::Graph(const FunctionLibraryDefinition& flib_def) : Graph(flib_def.default_registry()) { + // Need a new-enough consumer to support the functions we add to the graph. + if (flib_def.ToProto().function_size() > 0 && + versions_->min_consumer() < 12) { + versions_->set_min_consumer(12); + } Status s = ops_.AddLibrary(flib_def); CHECK(s.ok()) << s.error_message(); } @@ -448,6 +453,10 @@ const Edge* Graph::FindEdge(const Node* dst, int index) { } Status Graph::AddFunctionLibrary(const FunctionDefLibrary& fdef_lib) { + // Need a new-enough consumer to support the functions we add to the graph. + if (fdef_lib.function_size() > 0 && versions_->min_consumer() < 12) { + versions_->set_min_consumer(12); + } return ops_.AddLibrary(fdef_lib); } diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc index 858ef8ac01..20822ecb1d 100644 --- a/tensorflow/core/graph/graph_partition_test.cc +++ b/tensorflow/core/graph/graph_partition_test.cc @@ -91,10 +91,9 @@ void Partition(const GraphDef& graph_def, Status s = Partition(popts, &g, partitions); CHECK(s.ok()) << s; - // Check versions + // Check versions. EXPECT_EQ(graph_def.versions().producer(), TF_GRAPH_DEF_VERSION); - EXPECT_EQ(graph_def.versions().min_consumer(), - TF_GRAPH_DEF_VERSION_MIN_CONSUMER); + // Partitions must inherit the versions of the original graph. for (auto& it : *partitions) { EXPECT_EQ(graph_def.versions().producer(), it.second.versions().producer()); EXPECT_EQ(graph_def.versions().min_consumer(), -- GitLab From a713e49e8662b90eea3b5cda9bd50ae4c7546fef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 15:27:37 -0700 Subject: [PATCH 145/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171352952 --- tensorflow/go/op/wrappers.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index f2ee710a9e..804275dda6 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -22810,7 +22810,8 @@ func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...R // // Arguments: // gradients: The backpropagated gradients to the corresponding Relu6 operation. -// features: The features passed as input to the corresponding Relu6 operation. +// features: The features passed as input to the corresponding Relu6 operation, or +// its output; using either one produces the same result. // // Returns The gradients: // `gradients * (features > 0) * (features < 6)`. -- GitLab From d9a969c84b56fc5bca7ddbb58761303cafee94bd Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 6 Oct 2017 15:48:17 -0700 Subject: [PATCH 146/909] Disable some tests on tsan. PiperOrigin-RevId: 171355854 --- tensorflow/python/estimator/BUILD | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 3507d9fedc..22de474013 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -242,7 +242,10 @@ py_test( srcs = ["canned/dnn_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "notsan", # b/67510291 + ], deps = [ ":dnn", ":dnn_testing_utils", @@ -296,7 +299,10 @@ py_test( srcs = ["canned/dnn_linear_combined_test.py"], shard_count = 8, srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "notsan", # b/67510291 + ], deps = [ ":dnn_linear_combined", ":dnn_testing_utils", @@ -373,6 +379,7 @@ py_test( name = "estimator_test", srcs = ["estimator_test.py"], srcs_version = "PY2AND3", + tags = ["notsan"], # b/67510291 deps = [ ":estimator", ":export_export", @@ -646,7 +653,10 @@ py_test( srcs = ["canned/linear_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "notsan", # b/67510291 + ], deps = [ ":linear", ":linear_testing_utils", -- GitLab From be893ac19b13a77c645e168b6ab3f835062c4280 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 6 Oct 2017 15:53:53 -0700 Subject: [PATCH 147/909] Clean up our libcuda stub when building the GPU Docker container (#13456) --- tensorflow/tools/docker/Dockerfile.devel-gpu | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 04773376e9..a607e5e27b 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -78,15 +78,18 @@ WORKDIR /tensorflow # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python -ENV LD_LIBRARY_PATH /usr/local/cuda/lib64/stubs:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2,6.0,6.1 -RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 -RUN tensorflow/tools/ci_build/builds/configured GPU \ - bazel build -c opt --config=cuda --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ +RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ + LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \ + tensorflow/tools/ci_build/builds/configured GPU \ + bazel build -c opt --config=cuda \ + --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ tensorflow/tools/pip_package:build_pip_package && \ + rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ rm -rf /tmp/pip && \ -- GitLab From febf2e69608acae22f9b33e54e1088b7e1e0749c Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 6 Oct 2017 15:54:01 -0700 Subject: [PATCH 148/909] Update README.md with tf-nightly-gpu --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6339c57c95..24bbb6cec1 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,11 @@ People who are a little more adventurous can also try our nightly binaries: **Nightly pip packages** * We are pleased to announce that TensorFlow now offers nightly pip packages -under the [tf-nightly](https://pypi.python.org/pypi/tf-nightly) project on pypi. -Simply run `pip install tf-nightly` in a clean environment to install the nightly -tensorflow build. We currently only support CPU packages on Linux, Mac, and Windows. -GPU packages on all platforms will arrive soon! +under the [tf-nightly](https://pypi.python.org/pypi/tf-nightly) and +[tf-nightly-gpu](https://pypi.python.org/pypi/tf-nightly-gpu) project on pypi. +Simply run `pip install tf-nightly` or `pip install tf-nightly-gpu` in a clean +environment to install the nightly TensorFlow build. We support CPU and GPU +packages on Linux, Mac, and Windows. **Individual whl files** -- GitLab From 09369376b4ee41eafc674ce7a699fd74ee9468d5 Mon Sep 17 00:00:00 2001 From: melvyniandrag Date: Fri, 6 Oct 2017 19:35:34 -0400 Subject: [PATCH 149/909] modified readme (#13515) --- tensorflow/tools/docker/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 3780bde2be..2e5a0038ed 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -41,6 +41,7 @@ Note: If you would have a problem running nvidia-docker you may try the old meth we have used. But it is not recommended. If you find a bug in nvidia-docker, please report it there and try using nvidia-docker as described above. + $ # The old, not recommended way to run docker with gpu support: $ export CUDA_SO=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') $ docker run -it -p 8888:8888 $CUDA_SO $DEVICES gcr.io/tensorflow/tensorflow:latest-gpu -- GitLab From 8018fc9385647876b3ce954e4d9a345316526b0b Mon Sep 17 00:00:00 2001 From: "Dr. Kashif Rasul" Date: Sat, 7 Oct 2017 01:36:45 +0200 Subject: [PATCH 150/909] instructions for libcupti for CUDA 8 (#13414) --- tensorflow/docs_src/install/install_linux.md | 14 +++++++++++++- tensorflow/docs_src/install/install_sources.md | 11 +++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 576099f054..14cc1f733c 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -42,8 +42,20 @@ must be installed on your system: a list of supported GPU cards. * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. This library provides advanced profiling support. To install this library, - issue the following command: + issue the following command for CUDA Toolkit >= 8.0: +

+    $ sudo apt-get install cuda-command-line-tools
+    
+ + and add its path to your `LD_LIBRARY_PATH` environment variable: + +
 
+    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
+    
+ + For CUDA Toolkit <= 7.5 do: +
     $ sudo apt-get install libcupti-dev
     
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index e6a4088656..3d143506f0 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -137,8 +137,15 @@ The following NVIDIA software must be installed on your system: particularly the description of appending the appropriate pathname to your `LD_LIBRARY_PATH` environment variable. -Finally, you must also install `libcupti-dev` by invoking the following -command: +Finally, you must also install `libcupti` which for Cuda Toolkit >= 8.0 you do via + +
 $ sudo apt-get install cuda-command-line-tools 
+ +and add its path to your `LD_LIBRARY_PATH` environment variable: + +
 $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64 
+ +For Cuda Toolkit <= 7.5, you install `libcupti-dev` by invoking the following command:
 $ sudo apt-get install libcupti-dev 
-- GitLab From 6fc7de9522e0d1ed6f1e1d5fd095fdeb6a31b197 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 16:41:17 -0700 Subject: [PATCH 151/909] Define object-oriented metrics classes that are Eager-safe. PiperOrigin-RevId: 171363240 --- tensorflow/contrib/eager/python/BUILD | 31 +++ tensorflow/contrib/eager/python/metrics.py | 26 +++ .../contrib/eager/python/metrics_impl.py | 197 ++++++++++++++++++ .../contrib/eager/python/metrics_test.py | 59 ++++++ 4 files changed, 313 insertions(+) create mode 100644 tensorflow/contrib/eager/python/metrics.py create mode 100644 tensorflow/contrib/eager/python/metrics_impl.py create mode 100644 tensorflow/contrib/eager/python/metrics_test.py diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 9185c963f7..1a63c901a2 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -11,6 +11,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":datasets", + ":metrics", ":saver", ":summary_writer", "//tensorflow/python:framework_ops", @@ -116,6 +117,36 @@ cuda_py_test( ], ) +py_library( + name = "metrics", + srcs = [ + "metrics.py", + "metrics_impl.py", + ], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:init_ops", + "//tensorflow/python:layers_base", + "//tensorflow/python:math_ops", + "//tensorflow/python:util", + "//tensorflow/python:variable_scope", + ], +) + +py_test( + name = "metrics_test", + srcs = ["metrics_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":metrics", + "//tensorflow/python/eager:test", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/eager/python/metrics.py b/tensorflow/contrib/eager/python/metrics.py new file mode 100644 index 0000000000..3e31004273 --- /dev/null +++ b/tensorflow/contrib/eager/python/metrics.py @@ -0,0 +1,26 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Metrics namespace.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint:disable=wildcard-import +from tensorflow.contrib.eager.python.metrics_impl import * +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = ['Accuracy', 'Mean', 'Metric'] +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py new file mode 100644 index 0000000000..6bc0ce6dce --- /dev/null +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -0,0 +1,197 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Metrics classes for computing the output of an evaluation.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope + + +class Metric(object): + """A metric holds state for aggregating statistics over an evaluation run. + + Users will use Network.add_metric() to add Metric objects to their + evaluation network, call them in each step, and then use + Network.all_metric_results() at the end. + + Descendants will implement: + * call(): Should follow this pattern: + if not self.built: + self.var = self.add_variable(...) + self.add_update(self.var.assign_add(...)) + * aggregate(): Adds in the state from a list of metrics of the same type + as `self`. (Default of summing all the variables will be fine for most + descendants.) + * result(): Computes and returns a final value for the metric + from the variables in `self`. + """ + + def __init__(self, name=None): + self.built = False + self._vars = [] + self._updates = [] + self._name = name or self.__class__.__name__ + # TODO(josh11b): Need some way to make sure two Metrics in the same + # Network have distinct names. Maybe we can get a unique name from + # a name/variable scope? + # TODO(josh11b): self._in_graph_mode = context.in_graph_mode() + + # ---- API for users --- + def __call__(self, *args, **kwargs): + # TODO(josh11b): If self._in_graph_mode is true, make self.call() into a + # graph callable here, so that variable updates happen without requiring + # a separate fetch. + # TODO(josh11b): Do we need a separate build() method to separate + # initialization from each update? If so, how do we get the arguments + # to it? We *could* just pass in *args and **kwargs... + if not self.built: + # TODO(ashankar): Set up container isolation so there is no chance + # distinct metrics objects accidentally share variables. + with variable_scope.variable_scope( + self._name, use_resource=True, reuse=False): + ret = self.call(*args, **kwargs) + self.built = True + else: + ret = self.call(*args, **kwargs) + return ret + + @property + def name(self): + return self._name + + @property + def variables(self): + return self._vars + + # ---- To be implemented by descendants --- + def call(self, *args, **kwargs): + """Accumulates statistics for the metric.""" + raise NotImplementedError("Metrics must define a call() member function") + + # We can support two different strategies of for doing data-parallel + # distributed metric computations: + # * Put metric variables on the first device and rely on small + # bandwidth needed to do updates. (Doesn't require any particular + # code in Metric implementations.) + # * Ask each type of metric to define an aggregation method to run + # at the end of eval to merge across devices. Note: this is good + # for the use case where they want to record the metric's state + # for each example and then later decide which examples they want + # to aggregate over. (Recommended -- not too much harder and adds + # flexibilty over previous option.) + # I'm going with the second strategy since we can define a default + # implementation of aggregate() that will work for most descendants. + def aggregate(self, metrics): + """Adds in the state from a list of metrics. + + Default implementation sums all the metric variables. + + Args: + metrics: A list of metrics with the same type as `self`. + + Raises: + ValueError: If metrics contains invalid data. + """ + for m in metrics: + if type(self) != type(m): # pylint: disable=unidiomatic-typecheck + raise TypeError("All metrics must be the same type, '%s' != '%s'." % + (type(self), type(m))) + # pylint: disable=protected-access + for i in range(len(self._vars)): + if any(m._vars[i].name != self._vars[i].name for m in metrics): + raise ValueError("All metrics must have variables in the same order.") + self._vars[i].assign_add(math_ops.add_n([m._vars[i] for m in metrics])) + # pylint: enable=protected-access + + def result(self): + """Computes and returns a final value for the metric.""" + raise NotImplementedError("Metrics must define a result() member function") + + # ---- For use by descendants --- + def add_variable(self, name, shape=None, dtype=None, initializer=None): + """***Only for use by descendants of Metric***.""" + if self.built: + raise RuntimeError("Can't call add_variable() after a Metric has been " + "built in the first call().") + v = variable_scope.get_variable(name, shape, dtype, initializer, + trainable=False, use_resource=True) + self._vars.append(v) + return v + + +class Mean(Metric): + """Computes the (weighted) mean of the given values.""" + # TODO(josh11b): Maybe have a dtype argument that defaults to tf.float64? + # Or defaults to type of the input if it is tf.float32, else tf.float64? + + def call(self, values, weights=None): + """Accumulate statistics for computing the mean. + + For example, if values is [1, 3, 5, 7] then the mean is 4. + If the weights were specified as [1, 1, 0, 0] then the mean would be 2. + + Args: + values: Tensor with the per-example value. + weights: Optional weighting of each example. Defaults to 1. + """ + if not self.built: # False only in the first call(). + self.numer = self.add_variable(name="numer", shape=(), + dtype=dtypes.float64, + initializer=init_ops.zeros_initializer) + self.denom = self.add_variable(name="denom", shape=(), + dtype=dtypes.float64, + initializer=init_ops.zeros_initializer) + if weights is None: + self.denom.assign_add( + math_ops.cast(array_ops.size(values), dtypes.float64)) + values = math_ops.reduce_sum(values) + self.numer.assign_add(math_ops.cast(values, dtypes.float64)) + else: + weights = math_ops.cast(weights, dtypes.float64) + self.denom.assign_add(math_ops.reduce_sum(weights)) + values = math_ops.cast(values, dtypes.float64) * weights + self.numer.assign_add(math_ops.reduce_sum(values)) + + def result(self): + return self.numer / self.denom + + +class Accuracy(Mean): + """Calculates how often `predictions` matches `labels`.""" + + def call(self, labels, predictions, weights=None): + """Accumulate accuracy statistics. + + For example, if labels is [1, 2, 3, 4] and predictions is [0, 2, 3, 4] + then the accuracy is 3/4 or .75. If the weights were specified as + [1, 1, 0, 0] then the accuracy would be 1/2 or .5. + + `labels` and `predictions` should have the same shape and type. + + Args: + labels: Tensor with the true labels for each example. One example + per element of the Tensor. + predictions: Tensor with the predicted label for each example. + weights: Optional weighting of each example. Defaults to 1. + """ + matches = math_ops.equal(labels, predictions) + matches = math_ops.cast(matches, dtypes.float64) + super(Accuracy, self).call(matches, weights=weights) diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py new file mode 100644 index 0000000000..8c2d8081ba --- /dev/null +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -0,0 +1,59 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.eager.python import metrics +from tensorflow.python.eager import test + + +class MetricsTest(test.TestCase): + + def testMean(self): + m = metrics.Mean() + m([1, 10, 100]) + m(1000) + m([10000.0, 100000.0]) + self.assertEqual(111111.0/6, m.result().numpy()) + + def testWeightedMean(self): + m = metrics.Mean() + m([1, 100, 100000], weights=[1, 0.2, 0.3]) + m([500000, 5000, 500]) # weights of 1 each + self.assertNear(535521/4.5, m.result().numpy(), 0.001) + + def testAccuracy(self): + m = metrics.Accuracy() + m([0, 1, 2, 3], [0, 0, 0, 0]) # 1 correct + m([4], [4]) # 1 correct + m([5], [0]) # 0 correct + m([6], [6]) # 1 correct + m([7], [2]) # 0 correct + self.assertEqual(3.0/8, m.result().numpy()) + + def testWeightedAccuracy(self): + m = metrics.Accuracy() + # 1 correct, total weight of 2 + m([0, 1, 2, 3], [0, 0, 0, 0], weights=[1, 1, 0, 0]) + m([4], [4], weights=[0.5]) # 1 correct with a weight of 0.5 + m([5], [0], weights=[0.5]) # 0 correct, weight 0.5 + m([6], [6]) # 1 correct, weight 1 + m([7], [2]) # 0 correct, weight 1 + self.assertEqual(2.5/5, m.result().numpy()) + + +if __name__ == "__main__": + test.main() -- GitLab From c26542cdaeb4cd815406a8175251ff76cdfbc20a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 6 Oct 2017 17:08:19 -0700 Subject: [PATCH 152/909] [XLA] Don't clone and throw away instructions without calling DetachFromOperands. If you clone an instruction and then don't insert it into a computation, it's on you to call DetachFromOperands before destroying it. Otherwise the instruction will stay in its operands' use lists. PiperOrigin-RevId: 171367649 --- .../compiler/xla/service/algebraic_simplifier.cc | 13 ++++--------- tensorflow/compiler/xla/service/hlo_evaluator.cc | 13 +++++++++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 4858f47c59..dd97f3d876 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1782,7 +1782,7 @@ static const HloInstruction* NonConstantOperand(const HloInstruction* instr) { // Tries to determine the number of times the given loop executes. Currently // simply returns 0, 1, or "can't tell" (nullopt). -static optional GetLoopTripCount(const HloInstruction* while_op) { +static optional GetLoopTripCount(HloInstruction* while_op) { CHECK_EQ(while_op->opcode(), HloOpcode::kWhile); VLOG(2) << "Getting trip count for loop " << while_op->ToString(); @@ -1803,15 +1803,10 @@ static optional GetLoopTripCount(const HloInstruction* while_op) { // compute how many times the loop executes. Start by computing the induction // variable's initial value. HloEvaluator evaluator; - auto* while_init = while_op->operand(0); - auto* indvar_init = while_init->operand(*indvar_tuple_idx); - // TODO(b/67157142): This should not be redundant, remove this when the - // underlying issue has been addressed. - if (!hlo_query::AllOperandsAreConstants(*indvar_init)) { - return nullopt; - } + auto* while_init = while_op->mutable_operand(0); + auto* indvar_init = while_init->mutable_operand(*indvar_tuple_idx); StatusOr> indvar_init_result = - evaluator.Evaluate(indvar_init->Clone().get()); + evaluator.Evaluate(indvar_init); if (!indvar_init_result.ok()) { VLOG(2) << "Couldn't evaluate induction variable init: " << indvar_init_result.status(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 4f9d6c0096..61c59987f5 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1285,8 +1285,17 @@ StatusOr> HloEvaluator::EvaluateWithSubstitutions( operands.push_back(operand.get()); } - return Evaluate( - instruction->CloneWithNewOperands(instruction->shape(), operands).get()); + std::unique_ptr cloned_instruction = + instruction->CloneWithNewOperands(instruction->shape(), operands); + auto result = Evaluate(cloned_instruction.get()); + + // Clean up our cloned instructions before returning. + cloned_instruction->DetachFromOperands(); + for (auto& operand : owned_operands) { + operand->DetachFromOperands(); + } + + return result; } Status HloEvaluator::HandleParameter(HloInstruction* parameter) { -- GitLab From fb3c68db3fd9d1f18f8c5f8d6b005523dfcdf34d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 6 Oct 2017 17:30:25 -0700 Subject: [PATCH 153/909] Disable keras:models_test in tsan mode. PiperOrigin-RevId: 171369892 --- tensorflow/python/keras/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index f1266cdf9e..03bf9d2177 100644 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -654,6 +654,7 @@ py_test( size = "small", srcs = ["_impl/keras/models_test.py"], srcs_version = "PY2AND3", + tags = ["notsan"], # b/67509773 deps = [ ":keras", "//tensorflow/python:client_testlib", -- GitLab From 646db3e3f91cdfcb1d00eb2bd8bc510ce453e7d3 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 6 Oct 2017 18:07:17 -0700 Subject: [PATCH 154/909] eager: Compute num_gpus() correctly. Without this change, if TensorFlow is compiled with support for other devices (such with XLA, which makes XLA_CPU and XLA_GPU devices available), then tfe.num_gpus() was incorrectly overcounting the number of available GPUs. PiperOrigin-RevId: 171373389 --- tensorflow/python/eager/context.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 02ff567e9e..be3d535271 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -95,11 +95,18 @@ class Context(object): device_list = pywrap_tensorflow.TFE_ContextListDevices( self._context_handle, status) try: + self._num_gpus = 0 for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)): with errors.raise_exception_on_not_ok_status() as status: dev_name = pywrap_tensorflow.TF_DeviceListName( device_list, i, status) self._context_devices.append(pydev.canonical_name(dev_name)) + with errors.raise_exception_on_not_ok_status() as status: + dev_type = pywrap_tensorflow.TF_DeviceListType( + device_list, i, status) + if dev_type == "GPU": + self._num_gpus += 1 + finally: pywrap_tensorflow.TF_DeleteDeviceList(device_list) @@ -238,8 +245,8 @@ class Context(object): def num_gpus(self): """The number of GPUs available to execute operations.""" - # TODO(ashankar): Use TF_DeviceListType to count GPU devices. - return len(self._devices) - 1 + self._initialize_handle_and_devices() + return self._num_gpus def add_function_def(self, fdef): """Add a function definition to the context. -- GitLab From 96d276fe4db70a79a9283f35442b5e37dbfd66c6 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Fri, 6 Oct 2017 18:20:24 -0700 Subject: [PATCH 155/909] Improvements and fixes in VirtualPlacer: - fixed a recent regression where VirtualPlacer stopped placing onto non-default devices like "device:TPU", added a test for this, verified that the test failed without the fix; - fixed a number of problems with uppercase/lowercase mismatch in VirtualPlacer code, before that a slight difference between VirtualCluster device and node device ("/tpu:0" vs "/device:TPU:0") could cause fallback to default device, new code should be more resilient. PiperOrigin-RevId: 171374421 --- .../core/grappler/costs/virtual_placer.cc | 134 +++++++++++------- .../core/grappler/costs/virtual_placer.h | 21 ++- .../grappler/costs/virtual_placer_test.cc | 28 ++++ 3 files changed, 122 insertions(+), 61 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_placer.cc b/tensorflow/core/grappler/costs/virtual_placer.cc index 24c45235ff..965a2d2517 100644 --- a/tensorflow/core/grappler/costs/virtual_placer.cc +++ b/tensorflow/core/grappler/costs/virtual_placer.cc @@ -26,18 +26,27 @@ namespace grappler { VirtualPlacer::VirtualPlacer(const Cluster* cluster) { CHECK(cluster); devices_ = cluster->GetDevices(); + lfqn_map_.reserve(devices_.size()); + for (const auto& kv : devices_) { + const auto lfqn = to_lfqn_or_empty(kv.first); + if (lfqn.empty()) { + LOG(ERROR) << "VirtualPlacer couldn't parse device name from cluster: " + << kv.first; + } else { + lfqn_map_[lfqn] = kv.first; + } + } if (devices_.empty()) { // If there are no devices in the cluster, add a single device, "UNKNOWN" to // the cluster. - default_device_ = "UNKNOWN"; + default_device_name_ = "UNKNOWN"; DeviceProperties& prop = devices_["UNKNOWN"]; prop.set_type("UNKNOWN"); - } else if (devices_.size() == 1) { // If there is only one device in the cluster, use it as default device, // whatever it is. - default_device_ = devices_.begin()->first; + default_device_name_ = devices_.begin()->first; } else { // Default device is set from the devices in the cluster in the following // priority: /gpu:0, /cpu:0, or any device. @@ -46,41 +55,48 @@ VirtualPlacer::VirtualPlacer(const Cluster* cluster) { // other than CPU and GPU. std::map cpu_devices; // CPU device map: id -> device name. std::map gpu_devices; // GPU device map: id -> device name. - for (const auto& device : devices_) { + for (const auto& kv : lfqn_map_) { + const auto& lfqn = kv.first; + const auto& cluster_device_name = kv.second; DeviceNameUtils::ParsedName parsed_name; - bool parsed = DeviceNameUtils::ParseFullName(device.first, &parsed_name); + bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name); if (parsed) { // Parsed devices are stored to cpu_devices or gpu_devices map, - // addressed (and orderd) by device id. - if (str_util::Lowercase(parsed_name.type) == "gpu") { - gpu_devices[parsed_name.id] = device.first; - } else if (str_util::Lowercase(parsed_name.type) == "cpu") { - cpu_devices[parsed_name.id] = device.first; + // addressed (and ordered) by device id. + const auto type = str_util::Lowercase(parsed_name.type); + if (type == "gpu") { + gpu_devices[parsed_name.id] = cluster_device_name; + } else if (type == "cpu") { + cpu_devices[parsed_name.id] = cluster_device_name; } } } + if (!gpu_devices.empty()) { // GPU:0 (or GPU with smallest device id). - default_device_ = gpu_devices.begin()->second; + default_device_name_ = gpu_devices.begin()->second; } else if (!cpu_devices.empty()) { // CPU:0 (or CPU with smallest device id). - default_device_ = cpu_devices.begin()->second; + default_device_name_ = cpu_devices.begin()->second; } else { - default_device_ = devices_.begin()->first; // Any device. + default_device_name_ = devices_.begin()->first; // Any device. } } // Default job name for canonical device name. - default_job_name_ = "localhost"; + default_job_name_lowercase_ = "localhost"; // Scan the device names from the cluster, and if there is one job name used, // use it for canonical device name. std::unordered_set job_names_from_cluster; - for (const auto& device : devices_) { - const auto& device_name = device.first; + for (const auto& device : lfqn_map_) { + const auto& lfqn = device.first; DeviceNameUtils::ParsedName parsed_name; - bool parsed = DeviceNameUtils::ParseFullName(device_name, &parsed_name); + bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name); if (parsed && !parsed_name.job.empty()) { job_names_from_cluster.insert(parsed_name.job); + if (job_names_from_cluster.size() > 1) { + break; + } } } // If there is only type of job name in all the devices in the cluster, use @@ -89,60 +105,68 @@ VirtualPlacer::VirtualPlacer(const Cluster* cluster) { // composed of multiple worker, PS, and other types of jobs. if (job_names_from_cluster.size() == 1) { auto it = job_names_from_cluster.begin(); - default_job_name_ = *it; + default_job_name_lowercase_ = *it; } } const DeviceProperties& VirtualPlacer::get_device(const NodeDef& node) const { string device = get_canonical_device_name(node); - VLOG(3) << "Device name: " << device; + VLOG(3) << "node.name=" << node.name() << " node.device=" << node.device() + << " is placed on: " << device; auto it = devices_.find(device); DCHECK(it != devices_.end()); return it->second; } string VirtualPlacer::get_canonical_device_name(const NodeDef& node) const { - string device; - if (!node.device().empty()) { - if (devices_.find(node.device()) != devices_.end()) { - return node.device(); - } - DeviceNameUtils::ParsedName parsed_name; - bool parsed = DeviceNameUtils::ParseFullName(node.device(), &parsed_name); - if (!parsed) { - parsed = DeviceNameUtils::ParseLocalName(node.device(), &parsed_name); + if (node.device().empty()) { + return default_device_name_; + } + + const auto lfqn = to_lfqn_or_empty(node.device()); + if (lfqn.empty()) { + return default_device_name_; + } + + const auto it = lfqn_map_.find(lfqn); + if (it != lfqn_map_.end()) { + return it->second; + } + + return default_device_name_; +} + +string VirtualPlacer::to_lfqn_or_empty(const string& device_name) const { + DeviceNameUtils::ParsedName parsed_name; + const auto lowercase_name = str_util::Lowercase(device_name); + bool parsed = DeviceNameUtils::ParseFullName(lowercase_name, &parsed_name); + if (!parsed) { + parsed = DeviceNameUtils::ParseLocalName(lowercase_name, &parsed_name); + parsed_name.job = "localhost"; + } + if (!parsed) { + if (lowercase_name == "gpu" || lowercase_name == "cpu") { parsed_name.job = "localhost"; + parsed_name.type = lowercase_name; + parsed = true; } - if (!parsed) { - if (node.device() == "GPU" || node.device() == "CPU" || - node.device() == "gpu" || node.device() == "cpu") { - parsed_name.job = "localhost"; - parsed_name.type = node.device(); - parsed = true; - } - } - if (!parsed) { - return get_default_device_name(); - } else { - if (parsed_name.job.empty()) { - parsed_name.job = default_job_name_; - } - device = strings::StrCat( - "/job:", parsed_name.job, "/replica:", parsed_name.replica, - "/task:", parsed_name.task, "/", - str_util::Lowercase(parsed_name.type), ":", parsed_name.id); - } - } else { - return get_default_device_name(); } - if (devices_.find(device) == devices_.end()) { - return get_default_device_name(); + if (!parsed) { + return {}; } - return device; -} -const string& VirtualPlacer::get_default_device_name() const { - return default_device_; + if (parsed_name.job.empty()) { + parsed_name.job = default_job_name_lowercase_; + } + + // Have to do this, because parser returns uppercase types for CPU and GPU. + parsed_name.type = str_util::Lowercase(parsed_name.type); + + string lfqn = strings::StrCat( + "/job:", parsed_name.job, "/replica:", parsed_name.replica, + "/task:", parsed_name.task, "/device:", parsed_name.type, ":", + parsed_name.id); + return lfqn; } } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h index 75ee496329..7ccb1ebb99 100644 --- a/tensorflow/core/grappler/costs/virtual_placer.h +++ b/tensorflow/core/grappler/costs/virtual_placer.h @@ -33,16 +33,25 @@ class VirtualPlacer { const DeviceProperties& get_device(const NodeDef& node) const; - // Returns canonical device name that has a corresponding device in the - // cluster; returns empty string if no device found or the node.device() can - // not be parsed. + // Returns device name from cluster, which best matches the node.device() + // specification. Returns default device if no match was found or the + // node.device() could not be parsed. string get_canonical_device_name(const NodeDef& node) const; private: + // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string. + // This helps us disambiguate device names internally and simplify matching. + // If device_name couldn't be parsed succesfully, returns empty string. + string to_lfqn_or_empty(const string& device_name) const; + + // Map based on the cluster info: cluster device name -> device properties. std::unordered_map devices_; - string default_device_; - string default_job_name_; - const string& get_default_device_name() const; + + // Maps LFQN to original device name as it was declared in cluster. + std::unordered_map lfqn_map_; + + string default_device_name_; + string default_job_name_lowercase_; }; } // namespace grappler diff --git a/tensorflow/core/grappler/costs/virtual_placer_test.cc b/tensorflow/core/grappler/costs/virtual_placer_test.cc index 3a0510c44a..1c2e2815a6 100644 --- a/tensorflow/core/grappler/costs/virtual_placer_test.cc +++ b/tensorflow/core/grappler/costs/virtual_placer_test.cc @@ -53,6 +53,34 @@ TEST(VirtualPlacerTest, LocalDevices) { placer.get_canonical_device_name(node)); } +TEST(VirtualPlacerTest, PlacementOnNonDefaultDevice) { + // Create a virtual cluster with a CPU and a device:TPU + // Test that placement on TPU works + // In contrast with GPU, TPU is not selected as default device at the moment. + + std::unordered_map devices; + DeviceProperties cpu_device; + cpu_device.set_type("CPU"); + devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; + DeviceProperties tpu_device; + tpu_device.set_type("TPU"); + devices["/job:localhost/replica:0/task:0/device:TPU:0"] = tpu_device; + VirtualCluster cluster(devices); + VirtualPlacer placer(&cluster); + + NodeDef node; + node.set_op("Conv2D"); + // node.device() is empty, and CPU is default device. + EXPECT_EQ("CPU", placer.get_device(node).type()); + EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0", + placer.get_canonical_device_name(node)); + + node.set_device("/device:TPU:0"); + EXPECT_EQ("TPU", placer.get_device(node).type()); + EXPECT_EQ("/job:localhost/replica:0/task:0/device:TPU:0", + placer.get_canonical_device_name(node)); +} + TEST(VirtualPlacerTest, EmptyJobName) { // Virtual placer choose job name from the devices in cluster if a device name // of an op is empty. In case there are more than one kind of job name -- GitLab From 010dd39b949a57f80122ea7fdca8a0937f6fbb65 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 6 Oct 2017 18:24:03 -0700 Subject: [PATCH 156/909] Disable predict_test under tsan. PiperOrigin-RevId: 171374722 --- tensorflow/contrib/timeseries/examples/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 015d0eba29..8ed812f9d1 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,6 +25,7 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", + tags = ["notsan"], # b/67513579 deps = [ ":predict", "//tensorflow/python:client_testlib", -- GitLab From 5a107a9a278e98f2fcb77c8ac6c224d40c06e8c2 Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Fri, 6 Oct 2017 18:33:41 -0700 Subject: [PATCH 157/909] Fix broken docs links to other TensorFlow interfaces in tf.contrib.learn.Experiment PiperOrigin-RevId: 171375351 --- tensorflow/contrib/learn/python/learn/experiment.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 9b55826e62..307db76afe 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -149,16 +149,16 @@ class Experiment(object): Args: estimator: Object implementing Estimator interface, which could be a - combination of ${tf.contrib.learn.Trainable} and - ${tf.contrib.learn.Evaluable} (deprecated), or - ${tf.estimator.`Estimator}. + combination of @{tf.contrib.learn.Trainable} and + @{tf.contrib.learn.Evaluable} (deprecated), or + @{tf.estimator.Estimator}. train_input_fn: function, returns features and labels for training. eval_input_fn: function, returns features and labels for evaluation. If `eval_steps` is `None`, this should be configured only to produce for a finite number of batches (generally, 1 epoch over the evaluation data). eval_metrics: `dict` of string, metric function. If `None`, default set is used. This should be `None` if the `estimator` is - ${tf.estimator.Estimator}. If metrics are provided they will be + @{tf.estimator.Estimator}. If metrics are provided they will be *appended* to the default set. train_steps: Perform this many steps of training. `None`, the default, means train forever. -- GitLab From 394e5601c13da603237063d436d87867727ecf68 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 6 Oct 2017 18:34:17 -0700 Subject: [PATCH 158/909] Add a custom estimator example to the regression cookbook. PiperOrigin-RevId: 171375399 --- .../docs_src/get_started/linear_regression.md | 27 +++ .../examples/get_started/regression/BUILD | 1 + .../regression/custom_regression.py | 163 ++++++++++++++++++ .../get_started/regression/imports85.py | 6 +- .../examples/get_started/regression/test.py | 7 + 5 files changed, 201 insertions(+), 3 deletions(-) create mode 100644 tensorflow/examples/get_started/regression/custom_regression.py diff --git a/tensorflow/docs_src/get_started/linear_regression.md b/tensorflow/docs_src/get_started/linear_regression.md index b12bbd770f..7cfff8db15 100644 --- a/tensorflow/docs_src/get_started/linear_regression.md +++ b/tensorflow/docs_src/get_started/linear_regression.md @@ -27,6 +27,13 @@ to implement regression in Estimators: regression model on discrete data with a deep neural network. + + custom_regression.py + [imports85](https://archive.ics.uci.edu/ml/datasets/automobile) + Use @{tf.estimator.Estimator} to train a customized dnn + regression model. + + The preceding examples rely on the following data set utility: @@ -207,3 +214,23 @@ in a deep neural network. After printing loss values, the program outputs the Mean Square Error on a test set. + + + +## custom_regression.py + +The `custom_regression.py` example also trains a model that predicts the price +of a car based on mixed real-valued and categorical input features, described by +feature_columns. Unlike `linear_regression_categorical.py`, and +`dnn_regression.py` this example does not use a pre-made estimator, but defines +a custom model using the base @{tf.estimator.Estimator$`Estimator`} class. The +custom model is quite similar to the model defined by `dnn_regression.py`. + +The custom model is defined by the `model_fn` argument to the constructor. The +customization is made more reusable through `params` dictionary, which is later +passed through to the `model_fn` when the `model_fn` is called. + +The `model_fn` returns an +@{tf.estimator.EstimatorSpec$`EstimatorSpec`} which is a simple structure +indicating to the `Estimator` which operations should be run to accomplish +varions tasks. diff --git a/tensorflow/examples/get_started/regression/BUILD b/tensorflow/examples/get_started/regression/BUILD index 334c8096c1..577b970c90 100644 --- a/tensorflow/examples/get_started/regression/BUILD +++ b/tensorflow/examples/get_started/regression/BUILD @@ -18,6 +18,7 @@ py_test( name = "test", size = "medium", srcs = [ + "custom_regression.py", "dnn_regression.py", "imports85.py", "linear_regression.py", diff --git a/tensorflow/examples/get_started/regression/custom_regression.py b/tensorflow/examples/get_started/regression/custom_regression.py new file mode 100644 index 0000000000..2e34362c5c --- /dev/null +++ b/tensorflow/examples/get_started/regression/custom_regression.py @@ -0,0 +1,163 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Regression using the DNNRegressor Estimator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +import imports85 # pylint: disable=g-bad-import-order + +STEPS = 1000 +PRICE_NORM_FACTOR = 1000 + + +def my_dnn_regression_fn(features, labels, mode, params): + """A model function implementing DNN regression for a custom Estimator.""" + + # Extract the input into a dense layer, according to the feature_columns. + top = tf.feature_column.input_layer(features, params["feature_columns"]) + + # Iterate over the "hidden_units" list of layer sizes, default is [20]. + for units in params.get("hidden_units", [20]): + # Add a hidden layer, densely connected on top of the previous layer. + top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu) + + # Connect a linear output layer on top. + output_layer = tf.layers.dense(inputs=top, units=1) + + # Reshape the output layer to a 1-dim Tensor to return predictions + predictions = tf.squeeze(output_layer, 1) + + if mode == tf.estimator.ModeKeys.PREDICT: + # In `PREDICT` mode we only need to return predictions. + return tf.estimator.EstimatorSpec( + mode=mode, predictions={"price": predictions}) + + # Calculate loss using mean squared error + average_loss = tf.losses.mean_squared_error(labels, predictions) + + # Pre-made estimators use the total_loss instead of the average, + # so report total_loss for compatibility. + batch_size = tf.shape(labels)[0] + total_loss = tf.to_float(batch_size) * average_loss + + if mode == tf.estimator.ModeKeys.TRAIN: + optimizer = params.get("optimizer", tf.train.AdamOptimizer) + optimizer = optimizer(params.get("learning_rate", None)) + train_op = optimizer.minimize( + loss=average_loss, global_step=tf.train.get_global_step()) + + return tf.estimator.EstimatorSpec( + mode=mode, loss=total_loss, train_op=train_op) + + # In evaluation mode we will calculate evaluation metrics. + assert mode == tf.estimator.ModeKeys.EVAL + + # Calculate root mean squared error + rmse = tf.metrics.root_mean_squared_error(labels, predictions) + + # Add the rmse to the collection of evaluation metrics. + eval_metrics = {"rmse": rmse} + + return tf.estimator.EstimatorSpec( + mode=mode, + # Report sum of error for compatibility with pre-made estimators + loss=total_loss, + eval_metric_ops=eval_metrics) + + +def main(argv): + """Builds, trains, and evaluates the model.""" + assert len(argv) == 1 + (train, test) = imports85.dataset() + + # Switch the labels to units of thousands for better convergence. + def normalize_price(features, labels): + return features, labels / PRICE_NORM_FACTOR + + train = train.map(normalize_price) + test = test.map(normalize_price) + + # Build the training input_fn. + def input_train(): + return ( + # Shuffling with a buffer larger than the data set ensures + # that the examples are well mixed. + train.shuffle(1000).batch(128) + # Repeat forever + .repeat().make_one_shot_iterator().get_next()) + + # Build the validation input_fn. + def input_test(): + return (test.shuffle(1000).batch(128) + .make_one_shot_iterator().get_next()) + + # The first way assigns a unique weight to each category. To do this you must + # specify the category's vocabulary (values outside this specification will + # receive a weight of zero). Here we specify the vocabulary using a list of + # options. The vocabulary can also be specified with a vocabulary file (using + # `categorical_column_with_vocabulary_file`). For features covering a + # range of positive integers use `categorical_column_with_identity`. + body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"] + body_style = tf.feature_column.categorical_column_with_vocabulary_list( + key="body-style", vocabulary_list=body_style_vocab) + make = tf.feature_column.categorical_column_with_hash_bucket( + key="make", hash_bucket_size=50) + + feature_columns = [ + tf.feature_column.numeric_column(key="curb-weight"), + tf.feature_column.numeric_column(key="highway-mpg"), + # Since this is a DNN model, convert categorical columns from sparse + # to dense. + # Wrap them in an `indicator_column` to create a + # one-hot vector from the input. + tf.feature_column.indicator_column(body_style), + # Or use an `embedding_column` to create a trainable vector for each + # index. + tf.feature_column.embedding_column(make, dimension=3), + ] + + # Build a custom Estimator, using the model_fn. + # `params` is passed through to the `model_fn`. + model = tf.estimator.Estimator( + model_fn=my_dnn_regression_fn, + params={ + "feature_columns": feature_columns, + "learning_rate": 0.001, + "optimizer": tf.train.AdamOptimizer, + "hidden_units": [20, 20] + }) + + # Train the model. + model.train(input_fn=input_train, steps=STEPS) + + # Evaluate how the model performs on data it has not yet seen. + eval_result = model.evaluate(input_fn=input_test) + + # Print the Root Mean Square Error (RMSE). + print("\n" + 80 * "*") + print("\nRMS error for the test set: ${:.0f}" + .format(PRICE_NORM_FACTOR * eval_result["rmse"])) + + print() + + +if __name__ == "__main__": + # The Estimator periodically generates "INFO" logs; make these logs visible. + tf.logging.set_verbosity(tf.logging.INFO) + tf.app.run(main=main) diff --git a/tensorflow/examples/get_started/regression/imports85.py b/tensorflow/examples/get_started/regression/imports85.py index c165f0175d..96a464920a 100644 --- a/tensorflow/examples/get_started/regression/imports85.py +++ b/tensorflow/examples/get_started/regression/imports85.py @@ -140,10 +140,10 @@ def dataset(y_name="price", train_fraction=0.7): train = (base_dataset # Take only the training-set lines. .filter(in_training_set) - # Cache data so you only read the file once. - .cache() # Decode each line into a (features_dict, label) pair. - .map(decode_line)) + .map(decode_line) + # Cache data so you only decode the file once. + .cache()) # Do the same for the test-set. test = (base_dataset.filter(in_test_set).cache().map(decode_line)) diff --git a/tensorflow/examples/get_started/regression/test.py b/tensorflow/examples/get_started/regression/test.py index fa06dde9ae..652b44f543 100644 --- a/tensorflow/examples/get_started/regression/test.py +++ b/tensorflow/examples/get_started/regression/test.py @@ -34,6 +34,7 @@ import tensorflow.contrib.data as data import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression import tensorflow.examples.get_started.regression.linear_regression as linear_regression import tensorflow.examples.get_started.regression.linear_regression_categorical as linear_regression_categorical +import tensorflow.examples.get_started.regression.custom_regression as custom_regression from tensorflow.python.platform import googletest from tensorflow.python.platform import test @@ -86,6 +87,12 @@ class RegressionTest(googletest.TestCase): def test_dnn_regression(self): dnn_regression.main([""]) + @test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset}) + @test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)}) + @test.mock.patch.dict(custom_regression.__dict__, {"STEPS": 1}) + def test_custom_regression(self): + custom_regression.main([""]) + if __name__ == "__main__": googletest.main() -- GitLab From f8f1ccefb6afc9de0b07e8c1392ecf2abe3391e4 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Fri, 6 Oct 2017 19:32:10 -0700 Subject: [PATCH 159/909] Log in executor when a synchronous node is finished. Also log more info when an asynchronous node is finished. This is useful for debugging deadlocks and issues where a kernel does not return. PiperOrigin-RevId: 171379066 --- tensorflow/core/common_runtime/executor.cc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 11e063d8d2..ada29ff287 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -1617,14 +1617,17 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { NodeExecStatsWrapper* stats = state->stats; // Shorthand Entry* first_input = state->first_input; // Shorthand - if (vlog_) { - VLOG(2) << this << " Async kernel done: " - << SummarizeNode(*state->item->node); - } nodestats::SetOpEnd(stats); EntryVector outputs; Status s = ProcessOutputs(*state->item, &state->ctx, &outputs, stats); nodestats::SetMemory(stats, &state->ctx); + if (vlog_) { + VLOG(2) << "Async kernel done: " << state->item->node->id() + << " step " << step_id_ << " " + << SummarizeNode(*state->item->node) + << " is dead: " << state->tagged_node.is_dead; + } + // Clears inputs. const int num_inputs = state->item->num_inputs; for (int i = 0; i < num_inputs; ++i) { @@ -1672,6 +1675,12 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { } if (!launched_asynchronously) { + if (vlog_) { + VLOG(2) << "Synchronous kernel done: " << id << " step " + << params.step_id << " " << SummarizeNode(*node) + << " is dead: " << tagged_node.is_dead; + } + // Clears inputs. const int num_inputs = item.num_inputs; for (int i = 0; i < num_inputs; ++i) { -- GitLab From 843394627a43fd48b2cf77cb434948122e75858b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 6 Oct 2017 21:01:02 -0700 Subject: [PATCH 160/909] Make name scopes consistent. PiperOrigin-RevId: 171382508 --- .../gan/python/losses/python/losses_impl.py | 80 +++++++++++-------- .../python/losses/python/losses_impl_test.py | 6 +- 2 files changed, 49 insertions(+), 37 deletions(-) diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py index 2a40dbade6..b4a74fc49c 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py @@ -217,21 +217,25 @@ def acgan_discriminator_loss( Raises: TypeError: If the discriminator does not output a tuple. """ - loss_on_generated = losses.softmax_cross_entropy( - one_hot_labels, discriminator_gen_classification_logits, - weights=generated_weights, scope=scope, loss_collection=None, - reduction=reduction) - loss_on_real = losses.softmax_cross_entropy( - one_hot_labels, discriminator_real_classification_logits, - weights=real_weights, label_smoothing=label_smoothing, scope=scope, - loss_collection=None, reduction=reduction) - loss = loss_on_generated + loss_on_real - util.add_loss(loss, loss_collection) + with ops.name_scope( + scope, 'acgan_discriminator_loss', + (discriminator_real_classification_logits, + discriminator_gen_classification_logits, one_hot_labels)) as scope: + loss_on_generated = losses.softmax_cross_entropy( + one_hot_labels, discriminator_gen_classification_logits, + weights=generated_weights, scope=scope, loss_collection=None, + reduction=reduction) + loss_on_real = losses.softmax_cross_entropy( + one_hot_labels, discriminator_real_classification_logits, + weights=real_weights, label_smoothing=label_smoothing, scope=scope, + loss_collection=None, reduction=reduction) + loss = loss_on_generated + loss_on_real + util.add_loss(loss, loss_collection) - if add_summaries: - summary.scalar('discriminator_gen_ac_loss', loss_on_generated) - summary.scalar('discriminator_real_ac_loss', loss_on_real) - summary.scalar('discriminator_ac_loss', loss) + if add_summaries: + summary.scalar('discriminator_gen_ac_loss', loss_on_generated) + summary.scalar('discriminator_real_ac_loss', loss_on_real) + summary.scalar('discriminator_ac_loss', loss) return loss @@ -275,12 +279,16 @@ def acgan_generator_loss( ValueError: if arg module not either `generator` or `discriminator` TypeError: if the discriminator does not output a tuple. """ - loss = losses.softmax_cross_entropy( - one_hot_labels, discriminator_gen_classification_logits, weights=weights, - scope=scope, loss_collection=loss_collection, reduction=reduction) + with ops.name_scope( + scope, 'acgan_generator_loss', + (discriminator_gen_classification_logits, one_hot_labels)) as scope: + loss = losses.softmax_cross_entropy( + one_hot_labels, discriminator_gen_classification_logits, + weights=weights, scope=scope, loss_collection=loss_collection, + reduction=reduction) - if add_summaries: - summary.scalar('generator_ac_loss', loss) + if add_summaries: + summary.scalar('generator_ac_loss', loss) return loss @@ -546,7 +554,7 @@ def modified_generator_loss( discriminator_gen_outputs, label_smoothing=0.0, weights=1.0, - scope='generator_modified_loss', + scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): @@ -576,12 +584,15 @@ def modified_generator_loss( Returns: A loss Tensor. The shape depends on `reduction`. """ - loss = losses.sigmoid_cross_entropy( - array_ops.ones_like(discriminator_gen_outputs), discriminator_gen_outputs, - weights, label_smoothing, scope, loss_collection, reduction) + with ops.name_scope(scope, 'generator_modified_loss', + [discriminator_gen_outputs]) as scope: + loss = losses.sigmoid_cross_entropy( + array_ops.ones_like(discriminator_gen_outputs), + discriminator_gen_outputs, weights, label_smoothing, scope, + loss_collection, reduction) - if add_summaries: - summary.scalar('generator_modified_loss', loss) + if add_summaries: + summary.scalar('generator_modified_loss', loss) return loss @@ -739,7 +750,7 @@ def mutual_information_penalty( structured_generator_inputs, predicted_distributions, weights=1.0, - scope='generator_modified_loss', + scope=None, loss_collection=ops.GraphKeys.LOSSES, reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, add_summaries=False): @@ -767,15 +778,16 @@ def mutual_information_penalty( _validate_information_penalty_inputs( structured_generator_inputs, predicted_distributions) - # Calculate the negative log-likelihood of the reconstructed noise. - log_probs = [math_ops.reduce_mean(dist.log_prob(noise)) for dist, noise in - zip(predicted_distributions, structured_generator_inputs)] - loss = -1 * losses.compute_weighted_loss( - log_probs, weights, scope, loss_collection=loss_collection, - reduction=reduction) + with ops.name_scope(scope, 'mutual_information_loss') as scope: + # Calculate the negative log-likelihood of the reconstructed noise. + log_probs = [math_ops.reduce_mean(dist.log_prob(noise)) for dist, noise in + zip(predicted_distributions, structured_generator_inputs)] + loss = -1 * losses.compute_weighted_loss( + log_probs, weights, scope, loss_collection=loss_collection, + reduction=reduction) - if add_summaries: - summary.scalar('mutual_information_penalty', loss) + if add_summaries: + summary.scalar('mutual_information_penalty', loss) return loss diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index 3e003dd0f8..c15ce5baae 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -274,8 +274,8 @@ class ACGANLossTest(test.TestCase): self._discriminator_real_classification_logits, 'one_hot_labels': self._one_hot_labels, } - self._generator_loss_name = 'softmax_cross_entropy_loss/value' - self._discriminator_loss_name = 'add' + self._generator_loss_name = 'acgan_generator_loss/value' + self._discriminator_loss_name = 'acgan_discriminator_loss/add' self._expected_g_loss = 3.84974 self._expected_d_loss = 9.43950 @@ -504,7 +504,7 @@ class MutualInformationPenaltyTest(test.TestCase, _PenaltyTest): 'predicted_distributions': self._predicted_distributions, } self._expected_loss = 1.61610 - self._expected_op_name = 'mul' + self._expected_op_name = 'mutual_information_loss/mul' self._batch_size = 2 -- GitLab From d43911058b63c7e91fac01b8b18bffa4cd936868 Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Sat, 7 Oct 2017 15:04:58 +0900 Subject: [PATCH 161/909] Fix typos --- tensorflow/contrib/meta_graph_transform/meta_graph_transform.py | 2 +- tensorflow/core/framework/rendezvous.cc | 2 +- tensorflow/core/profiler/g3doc/options.md | 2 +- tensorflow/examples/get_started/regression/imports85.py | 2 +- tensorflow/python/debug/cli/tensor_format.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index 303c02dfa4..2932ae1c8d 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -749,7 +749,7 @@ def meta_graph_transform( base_meta_graph_def, meta_graph_def, collection_name, removed_op_names) - # Append newly added initalizers to collection. + # Append newly added initializers to collection. _add_new_inits_to_collection(meta_graph_def, updated_initializer_names) # Copy signature_defs, excluding any pruned nodes diff --git a/tensorflow/core/framework/rendezvous.cc b/tensorflow/core/framework/rendezvous.cc index 90426defa0..a9e4c1cfb1 100644 --- a/tensorflow/core/framework/rendezvous.cc +++ b/tensorflow/core/framework/rendezvous.cc @@ -210,7 +210,7 @@ class LocalRendezvousImpl : public Rendezvous { ItemQueue* queue = &table_[key_hash]; if (queue->empty() || !queue->front()->IsSendValue()) { // There is no message to pick up. - // Only recv-related fileds need to be filled. + // Only recv-related fields need to be filled. Item* item = new Item; item->waiter = std::move(done); item->recv_args = recv_args; diff --git a/tensorflow/core/profiler/g3doc/options.md b/tensorflow/core/profiler/g3doc/options.md index ddee63ad42..4c73e372e3 100644 --- a/tensorflow/core/profiler/g3doc/options.md +++ b/tensorflow/core/profiler/g3doc/options.md @@ -43,7 +43,7 @@ In graph view, in means the number of hops in the graph. ### Times -Most machines have mutli-core CPUs. Some installs one or more accelerators. +Most machines have multi-core CPUs. Some installs one or more accelerators. Each accelerator usually performs massive parallel processing. The profiler tracks the accumulated processing times. Hence, the accumulated processing time is likely larger than the time of each step. diff --git a/tensorflow/examples/get_started/regression/imports85.py b/tensorflow/examples/get_started/regression/imports85.py index c165f0175d..56d19f0d0a 100644 --- a/tensorflow/examples/get_started/regression/imports85.py +++ b/tensorflow/examples/get_started/regression/imports85.py @@ -127,7 +127,7 @@ def dataset(y_name="price", train_fraction=0.7): def in_test_set(line): """Returns a boolean tensor, true if the line is in the training set.""" # Items not in the training set are in the test set. - # This line must use `~` instead of `not` beacuse `not` only works on python + # This line must use `~` instead of `not` because `not` only works on python # booleans but we are dealing with symbolic tensors. return ~in_training_set(line) diff --git a/tensorflow/python/debug/cli/tensor_format.py b/tensorflow/python/debug/cli/tensor_format.py index 7a5597db12..05ccf93f15 100644 --- a/tensorflow/python/debug/cli/tensor_format.py +++ b/tensorflow/python/debug/cli/tensor_format.py @@ -480,7 +480,7 @@ def _pad_string_to_length(string, length): def numeric_summary(tensor): - """Get a text summmary of a numeric tensor. + """Get a text summary of a numeric tensor. This summary is only available for numeric (int*, float*, complex*) and Boolean tensors. -- GitLab From f59ef8a3e5c79ed97813b136d900ade31c0c11a7 Mon Sep 17 00:00:00 2001 From: Hanmin Qin Date: Sat, 7 Oct 2017 15:04:59 +0800 Subject: [PATCH 162/909] small typo --- .../get_started/regression/linear_regression_categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/get_started/regression/linear_regression_categorical.py b/tensorflow/examples/get_started/regression/linear_regression_categorical.py index 860d0e437c..e2ad415fbc 100644 --- a/tensorflow/examples/get_started/regression/linear_regression_categorical.py +++ b/tensorflow/examples/get_started/regression/linear_regression_categorical.py @@ -67,7 +67,7 @@ def main(argv): # The second way, appropriate for an unspecified vocabulary, is to create a # hashed column. It will create a fixed length list of weights, and - # automatically assign each input categort to a weight. Due to the + # automatically assign each input category to a weight. Due to the # pseudo-randomness of the process, some weights may be shared between # categories, while others will remain unused. make_column = tf.feature_column.categorical_column_with_hash_bucket( -- GitLab From 188297f80e0341f2480071c85a671c6c0abdbf8e Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Sat, 7 Oct 2017 11:08:19 -0400 Subject: [PATCH 163/909] Added missing `` in train_and_evaluate doc --- tensorflow/python/estimator/training.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 64b014a6b5..45bff233ea 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -408,8 +408,8 @@ def train_and_evaluate(estimator, train_spec, eval_spec): Args: estimator: An `Estimator` instance to train and evaluate. - train_spec: A `TrainSpec instance to specify the training specification. - eval_spec: A `EvalSpec instance to specify the evaluation and export + train_spec: A `TrainSpec` instance to specify the training specification. + eval_spec: A `EvalSpec` instance to specify the evaluation and export specification. Raises: -- GitLab From e81fbdf719f39d82afb5c6e27c99cd006fb5f689 Mon Sep 17 00:00:00 2001 From: Armen Donigian Date: Sat, 7 Oct 2017 09:38:14 -0700 Subject: [PATCH 164/909] This branch updates the installation instructions for conda install to include pip as well, in order to prevent the usage of the pip installed in the root conda environment. --- tensorflow/docs_src/install/install_linux.md | 2 +- tensorflow/docs_src/install/install_mac.md | 2 +- tensorflow/docs_src/install/install_windows.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 14cc1f733c..2b488cc4f5 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -457,7 +457,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: 2. Create a conda environment named tensorflow to run a version of Python by invoking the following command: -
$ conda create -n tensorflow python=2.7 # or python=3.3, etc.
+
$ conda create -n tensorflow pip python=2.7 # or python=3.3, etc.
3. Activate the conda environment by issuing the following command: diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index b6daeb0dd6..efd977089b 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -321,7 +321,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: 2. Create a conda environment named `tensorflow` by invoking the following command: -
$ conda create -n tensorflow python=2.7 # or python=3.3, etc.
+
$ conda create -n tensorflow pip python=2.7 # or python=3.3, etc.
3. Activate the conda environment by issuing the following command: diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index ae8749c231..f0d580d803 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -105,7 +105,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: 2. Create a conda environment named tensorflow by invoking the following command: -
C:\> conda create -n tensorflow python=3.5 
+
C:\> conda create -n tensorflow pip python=3.5 
3. Activate the conda environment by issuing the following command: -- GitLab From 0652d7aced72f795c494cd371d9e6aa8e082d0c8 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 7 Oct 2017 17:07:40 +0000 Subject: [PATCH 165/909] Fix broken link in performance guide This fix fixes broken link in performance guide as models repo moved slim to `models/research/slim` `https://github.com/tensorflow/models/tree/master/slim#Data` -> `https://github.com/tensorflow/models/tree/master/research/slim#Data` Signed-off-by: Yong Tang --- tensorflow/docs_src/performance/performance_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/performance/performance_guide.md b/tensorflow/docs_src/performance/performance_guide.md index 30fb91f9d9..d3aa901bec 100644 --- a/tensorflow/docs_src/performance/performance_guide.md +++ b/tensorflow/docs_src/performance/performance_guide.md @@ -93,7 +93,7 @@ Reading large numbers of small files significantly impacts I/O performance. One approach to get maximum I/O throughput is to preprocess input data into larger (~100MB) `TFRecord` files. For smaller data sets (200MB-1GB), the best approach is often to load the entire data set into memory. The document -[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/slim#Data) +[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#Data) includes information and scripts for creating `TFRecords` and this [script](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py) converts the CIFAR-10 data set into `TFRecords`. -- GitLab From b3a286301beb68d6809f892b7f252204eb02b880 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 7 Oct 2017 17:12:43 +0000 Subject: [PATCH 166/909] Fix broken link in performance models This fix fixes broken link in performance models as models repo moved inception to `models/research/inception`: `https://github.com/tensorflow/models/tree/master/inception#getting-started` -> `https://github.com/tensorflow/models/tree/master/research/inception#getting-started` Signed-off-by: Yong Tang --- tensorflow/docs_src/performance/performance_models.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/performance/performance_models.md b/tensorflow/docs_src/performance/performance_models.md index 183bbc75a9..fcda19e74c 100644 --- a/tensorflow/docs_src/performance/performance_models.md +++ b/tensorflow/docs_src/performance/performance_models.md @@ -345,7 +345,7 @@ executing the main script * **`num_gpus`**: Number of GPUs to use. * **`data_dir`**: Path to data to process. If not set, synthetic data is used. To use Imagenet data use these - [instructions](https://github.com/tensorflow/models/tree/master/inception#getting-started) + [instructions](https://github.com/tensorflow/models/tree/master/research/inception#getting-started) as a starting point. * **`batch_size`**: Batch size for each GPU. * **`variable_update`**: The method for managing variables: `parameter_server` -- GitLab From 54b8c7b8d2d44d862a7ecb297c835d60fca427ad Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 7 Oct 2017 22:49:33 -0700 Subject: [PATCH 167/909] Mirror SQLite zip file PiperOrigin-RevId: 171441141 --- tensorflow/workspace.bzl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index de0084613b..6151dc6241 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -313,7 +313,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.new_http_archive( name = "sqlite_archive", - urls = ["http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip"], + urls = [ + "http://mirror.bazel.build/www.sqlite.org/2017/sqlite-amalgamation-3200000.zip", + "http://www.sqlite.org/2017/sqlite-amalgamation-3200000.zip", + ], sha256 = "208780b3616f9de0aeb50822b7a8f5482f6515193859e91ed61637be6ad74fd4", strip_prefix = "sqlite-amalgamation-3200000", build_file = str(Label("//third_party:sqlite.BUILD")) -- GitLab From a1ab2a3b5263c535bfece377f1bdd77c7ade3240 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 7 Oct 2017 22:55:05 -0700 Subject: [PATCH 168/909] Pin TensorBoard 0.4 to tf-nightly (#13545) --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index a7a0706d0b..f476fe766f 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.12.1', 'six >= 1.10.0', 'protobuf >= 3.3.0', - 'tensorflow-tensorboard >= 0.1.0, < 0.2.0', + 'tensorflow-tensorboard >= 0.4.0rc1, < 0.5.0', ] project_name = 'tensorflow' -- GitLab From 3431602bdf00038a87522b3afb08095d20e9a064 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sat, 7 Oct 2017 23:11:20 -0700 Subject: [PATCH 169/909] Disable kmeans test in tsan. PiperOrigin-RevId: 171441927 --- tensorflow/contrib/factorization/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 8a7825c614..c741815042 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -206,6 +206,7 @@ py_test( size = "medium", srcs = ["python/ops/kmeans_test.py"], srcs_version = "PY2AND3", + tags = ["notsan"], # b/67512932 deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", -- GitLab From 074b66af3415cb3c60336b0a94f23aec04a715e3 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 8 Oct 2017 14:19:49 -0700 Subject: [PATCH 170/909] Change `dim` to `axis` for cosine_distance (#12801) * Change `dim` to `axis` for cosine_distance This fix changes `dim` to `axis` for cosine_distance so that the args are consistent with other methods in TensorFlow. The backward-compatibility has been maintained in the fix. This fix fixes 8205. Signed-off-by: Yong Tang * Change `dim` to `axis` for tf.losses.cosine_distance so that args are consistent with other TensorFlow methods. Signed-off-by: Yong Tang * Update API goldens and address review feedback This commit updates API goldens so that `//tensorflow/tools/api/tests:api_compatibility_test` could pass. Review feedback has also been addressed. Signed-off-by: Yong Tang --- .../contrib/losses/python/losses/loss_ops.py | 17 +++++++++----- tensorflow/python/ops/losses/losses_impl.py | 22 +++++++++++++------ .../tools/api/golden/tensorflow.losses.pbtxt | 2 +- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 1d2477b8b7..7c523ad492 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.util.deprecation import deprecated +from tensorflow.python.util.deprecation import deprecated_args __all__ = ["absolute_difference", "add_loss", @@ -623,8 +624,9 @@ def mean_pairwise_squared_error( @deprecated("2016-12-30", "Use tf.losses.cosine_distance instead.") +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") def cosine_distance( - predictions, labels=None, dim=None, weights=1.0, scope=None): + predictions, labels=None, axis=None, weights=1.0, scope=None, dim=None): """Adds a cosine-distance loss to the training procedure. Note that the function assumes that `predictions` and `labels` are already @@ -633,10 +635,11 @@ def cosine_distance( Args: predictions: An arbitrary matrix. labels: A `Tensor` whose shape matches 'predictions' - dim: The dimension along which the cosine distance is computed. + axis: The dimension along which the cosine distance is computed. weights: Coefficients for the loss a scalar, a tensor of shape [batch_size] or a tensor whose shape matches `predictions`. scope: The scope for the operations performed in computing the loss. + dim: The old (deprecated) name for `axis`. Returns: A scalar `Tensor` representing the loss value. @@ -645,8 +648,12 @@ def cosine_distance( ValueError: If `predictions` shape doesn't match `labels` shape, or `weights` is `None`. """ - if dim is None: - raise ValueError("`dim` cannot be None.") + if dim is not None: + if axis is not None: + raise ValueError("Cannot specify both 'axis' and 'dim'") + axis = dim + if axis is None and dim is None: + raise ValueError("You must specify 'axis'.") with ops.name_scope(scope, "cosine_distance_loss", [predictions, labels, weights]) as scope: predictions.get_shape().assert_is_compatible_with(labels.get_shape()) @@ -655,5 +662,5 @@ def cosine_distance( labels = math_ops.to_float(labels) radial_diffs = math_ops.multiply(predictions, labels) - losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,]) + losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[axis,]) return compute_weighted_loss(losses, weights, scope=scope) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 752d260fba..55a18d28ca 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -27,6 +27,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import util +from tensorflow.python.util.deprecation import deprecated_args class Reduction(object): @@ -230,10 +231,12 @@ def absolute_difference( losses, weights, scope, loss_collection, reduction=reduction) +@deprecated_args(None, "dim is deprecated, use axis instead", "dim") def cosine_distance( - labels, predictions, dim=None, weights=1.0, scope=None, + labels, predictions, axis=None, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, - reduction=Reduction.SUM_BY_NONZERO_WEIGHTS): + reduction=Reduction.SUM_BY_NONZERO_WEIGHTS, + dim=None): """Adds a cosine-distance loss to the training procedure. Note that the function assumes that `predictions` and `labels` are already @@ -242,13 +245,14 @@ def cosine_distance( Args: labels: `Tensor` whose shape matches 'predictions' predictions: An arbitrary matrix. - dim: The dimension along which the cosine distance is computed. + axis: The dimension along which the cosine distance is computed. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. reduction: Type of reduction to apply to loss. + dim: The old (deprecated) name for `axis`. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same @@ -256,10 +260,14 @@ def cosine_distance( Raises: ValueError: If `predictions` shape doesn't match `labels` shape, or - `dim`, `labels`, `predictions` or `weights` is `None`. + `axis`, `labels`, `predictions` or `weights` is `None`. """ - if dim is None: - raise ValueError("`dim` cannot be None.") + if dim is not None: + if axis is not None: + raise ValueError("Cannot specify both 'axis' and 'dim'") + axis = dim + if axis is None and dim is None: + raise ValueError("You must specify 'axis'.") if labels is None: raise ValueError("labels must not be None.") if predictions is None: @@ -271,7 +279,7 @@ def cosine_distance( predictions.get_shape().assert_is_compatible_with(labels.get_shape()) radial_diffs = math_ops.multiply(predictions, labels) - losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(dim,), keep_dims=True) + losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keep_dims=True) return compute_weighted_loss( losses, weights, scope, loss_collection, reduction=reduction) diff --git a/tensorflow/tools/api/golden/tensorflow.losses.pbtxt b/tensorflow/tools/api/golden/tensorflow.losses.pbtxt index 79443839b9..c1d190ae11 100644 --- a/tensorflow/tools/api/golden/tensorflow.losses.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.losses.pbtxt @@ -18,7 +18,7 @@ tf_module { } member_method { name: "cosine_distance" - argspec: "args=[\'labels\', \'predictions\', \'dim\', \'weights\', \'scope\', \'loss_collection\', \'reduction\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'None\', \'losses\', \'weighted_sum_by_nonzero_weights\'], " + argspec: "args=[\'labels\', \'predictions\', \'axis\', \'weights\', \'scope\', \'loss_collection\', \'reduction\', \'dim\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'None\', \'losses\', \'weighted_sum_by_nonzero_weights\', \'None\'], " } member_method { name: "get_losses" -- GitLab From cab4f6f615e259546a1c0719a32d019730b2ee71 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 8 Oct 2017 15:50:43 -0700 Subject: [PATCH 171/909] Improve invalid size vocab ValueError by appending the vocab file. This is helpful to identify erroneous vocab file for the common case of training programs with multiple vocabs. PiperOrigin-RevId: 171476954 --- .../python/kernel_tests/lookup_ops_test.py | 21 +++++++++++++++++++ tensorflow/python/ops/lookup_ops.py | 7 ++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 1d92a08f5c..76c790a0a2 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -378,6 +378,27 @@ class IndexTableFromFile(test.TestCase): self.assertRaises( ValueError, lookup_ops.index_table_from_file, vocabulary_file=None) + def test_index_table_from_file_str_fails_with_zero_size_vocabulary(self): + vocabulary_file = self._createVocabFile("zero_vocab_str.txt") + self.assertRaisesRegexp( + ValueError, + "vocab_size must be greater than 0, got 0. " + "vocabulary_file: .*zero_vocab_str.txt", + lookup_ops.index_table_from_file, + vocabulary_file=vocabulary_file, + vocab_size=0) + + def test_index_table_from_file_tensor_fails_with_zero_size_vocabulary(self): + vocabulary_file = constant_op.constant( + self._createVocabFile("zero_vocab_tensor.txt")) + self.assertRaisesRegexp( + ValueError, + "vocab_size must be greater than 0, got 0. " + "vocabulary_file: .*zero_vocab_tensor.txt", + lookup_ops.index_table_from_file, + vocabulary_file=vocabulary_file, + vocab_size=0) + def test_index_table_from_file_with_vocab_size_too_small(self): vocabulary_file = self._createVocabFile("f2i_vocab6.txt") with self.test_session(): diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index bbfa38aa17..7f00344be2 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_lookup_ops @@ -927,7 +928,11 @@ def index_table_from_file(vocabulary_file=None, raise ValueError("num_oov_buckets must be greater or equal than 0, got %d." % num_oov_buckets) if vocab_size is not None and vocab_size < 1: - raise ValueError("vocab_size must be greater than 0, got %d." % vocab_size) + vocab_file_value = vocabulary_file + if isinstance(vocabulary_file, ops.Tensor): + vocab_file_value = tensor_util.constant_value(vocabulary_file) or "?" + raise ValueError("vocab_size must be greater than 0, got %d. " + "vocabulary_file: %s" % (vocab_size, vocab_file_value)) if (not key_dtype.is_integer) and (dtypes.string != key_dtype.base_dtype): raise TypeError("Only integer and string keys are supported.") -- GitLab From e0924e0577fe42b455be5fb881647fa64ea5b7c3 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sun, 8 Oct 2017 16:18:24 -0700 Subject: [PATCH 172/909] [TFXLA] Don't discard status unless it is NotFound. PiperOrigin-RevId: 171477807 --- tensorflow/compiler/tf2xla/xla_compiler.cc | 19 +++- tensorflow/compiler/tf2xla/xla_compiler.h | 2 + .../compiler/tf2xla/xla_compiler_test.cc | 99 ++++++++++++++----- 3 files changed, 90 insertions(+), 30 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 8521d4167a..1cd96fc4e2 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -92,7 +92,6 @@ XlaCompiler::XlaCompiler(XlaCompiler::Options options) } local_flib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), - FunctionDefLibrary{})); local_pflr_.reset(new ProcessFunctionLibraryRuntime( &device_mgr_, Env::Default(), options.graph_def_version, @@ -142,8 +141,17 @@ Status XlaCompiler::CompileFunction( } const FunctionBody* fbody; - if (!GetFunctionBody(function, local_flib_runtime_, &fbody).ok()) { - TF_RETURN_IF_ERROR(GetFunctionBody(function, flib_runtime_, &fbody)); + // The function may be in either the local_flib_runtime_ or flib_runtime_. + // Look up the function in local first and if it is not found then look up the + // function in flib_runtime_. + auto status = GetFunctionBody(function, local_flib_runtime_, &fbody); + if (!status.ok()) { + if (!errors::IsNotFound(status)) { + return status; + } + TF_RETURN_WITH_CONTEXT_IF_ERROR( + GetFunctionBody(function, flib_runtime_, &fbody), + "Local lookup failed with: ", status.error_message()); } TF_RETURN_IF_ERROR(CheckSignature(fbody->arg_types, args)); @@ -509,7 +517,7 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, result->requires_runtime_context = context->has_context_parameter(); // Tuple arguments and runtime context parameters are incompatible. - CHECK(!(options.use_tuple_arg && result->requires_runtime_context)); + TF_RET_CHECK(!(options.use_tuple_arg && result->requires_runtime_context)); VLOG(2) << "Outputs: total: " << context->retvals().size() << " nonconstant: " << num_nonconst_outputs; @@ -546,7 +554,8 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, i < context->retvals().size(); ++i) { const XlaExpression& retval = context->retvals()[i]; if (!retval.has_constant_value()) { - CHECK_LT(computation_output, num_computation_outputs); + TF_RET_CHECK(computation_output < num_computation_outputs) + << "Computation has more outputs than expected"; OutputDescription& output = result->outputs[i]; output.is_constant = false; TF_RETURN_IF_ERROR(XLAShapeToTensorShape( diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index 35159dbad4..addea74fc2 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -287,6 +287,8 @@ class XlaCompiler { FunctionLibraryRuntime* flib_runtime() const { return flib_runtime_; } private: + friend class XlaCompilerTest; + Options options_; // Status set to non-OK in the constructor if initialization fails. diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 531725a623..9af557e23c 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/graph/graph.h" @@ -36,6 +37,37 @@ limitations under the License. #include "tensorflow/core/public/version.h" namespace tensorflow { + +class XlaCompilerTest : public ::testing::Test { + protected: + XlaCompilerTest() : cpu_device_type_(DEVICE_CPU_XLA_JIT) {} + + void SetUp() override { + client_ = xla::ClientLibrary::LocalClientOrDie(); + + XlaOpRegistry::RegisterCompilationKernels(); + + FunctionDefLibrary flib; + flib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), flib)); + } + + XlaCompiler::Options DefaultOptions() { + XlaCompiler::Options options; + options.device_type = &cpu_device_type_; + options.client = client_; + options.flib_def = flib_def_.get(); + return options; + } + + FunctionLibraryDefinition* LocalFlibDef(XlaCompiler* compiler) { + return compiler->local_flib_def_.get(); + } + + DeviceType cpu_device_type_; + xla::Client* client_; + std::unique_ptr flib_def_; +}; + namespace { // Helper class to test the ability to pass resources through to XLA @@ -125,31 +157,6 @@ REGISTER_XLA_OP(Name("DummyDuplicateOp").Device(DEVICE_CPU_XLA_JIT), REGISTER_XLA_OP(Name("DummyDuplicateOp").Device(DEVICE_GPU_XLA_JIT), DummyDuplicateOp); -class XlaCompilerTest : public ::testing::Test { - protected: - XlaCompilerTest() : cpu_device_type_(DEVICE_CPU_XLA_JIT) {} - - void SetUp() override { - client_ = xla::ClientLibrary::LocalClientOrDie(); - - XlaOpRegistry::RegisterCompilationKernels(); - - FunctionDefLibrary flib; - flib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), flib)); - } - - XlaCompiler::Options DefaultOptions() { - XlaCompiler::Options options; - options.device_type = &cpu_device_type_; - options.client = client_; - options.flib_def = flib_def_.get(); - return options; - } - - DeviceType cpu_device_type_; - xla::Client* client_; - std::unique_ptr flib_def_; -}; // Tests compilation and execution of an empty graph. TEST_F(XlaCompilerTest, EmptyReturnValues) { @@ -489,5 +496,47 @@ TEST_F(XlaCompilerTest, NewTensorArrayGradientsAreComputationOutputs) { EXPECT_EQ(1, result.resource_updates.size()); } +// Tests CompileFunction with undefined function fails. +TEST_F(XlaCompilerTest, UndefinedFunctionFails) { + XlaCompiler compiler(DefaultOptions()); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + XlaCompiler::CompilationResult result; + NameAttrList name_attr; + name_attr.set_name("Function_NotDefined_"); + Status status = + compiler.CompileFunction(XlaCompiler::CompileOptions(), name_attr, + /*args=*/{}, &result); + EXPECT_FALSE(status.ok()); + EXPECT_TRUE(StringPiece(status.error_message()).contains("is not defined.")) + << status.error_message(); +} + +// Tests CompileFunction with a local function lookup failing, fails with +// informative error about both lookups. +TEST_F(XlaCompilerTest, LocalFunctionWithWrongArgumentsFail) { + XlaCompiler compiler(DefaultOptions()); + + auto local_flib_def = LocalFlibDef(&compiler); + TF_ASSERT_OK(local_flib_def->AddFunctionDef(test::function::XTimesTwo())); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + XlaCompiler::CompilationResult result; + NameAttrList name_attr; + name_attr.set_name("XTimesTwo"); + Status status = + compiler.CompileFunction(XlaCompiler::CompileOptions(), name_attr, + /*args=*/{}, &result); + + ASSERT_FALSE(status.ok()); + // Flib lookup failure. + EXPECT_TRUE(StringPiece(status.error_message()).contains("is not defined.")) + << status.error_message(); + // Local flib lookup failure. + EXPECT_TRUE( + StringPiece(status.error_message()).contains("Attr T is not found")) + << status.error_message(); +} + } // namespace } // namespace tensorflow -- GitLab From 21da2369596e8d21aab6a562c747f4ea8a72480b Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sun, 8 Oct 2017 20:47:49 -0700 Subject: [PATCH 173/909] Disable flaky cluster_function_library_runtime_test in opensource. PiperOrigin-RevId: 171489827 --- tensorflow/core/distributed_runtime/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 87c56b66a5..26e82fbb9a 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -121,7 +121,10 @@ tf_cc_test( name = "cluster_function_library_runtime_test", srcs = ["cluster_function_library_runtime_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), - tags = ["nomac"], + tags = [ + "no_oss", + "nomac", + ], deps = [ ":worker_session", "//tensorflow/core:framework_internal", -- GitLab From 159dfb5e0b8e2b393ac6fa24a38c707bca154c1e Mon Sep 17 00:00:00 2001 From: Scott Mudge <19617165+scottmudge@users.noreply.github.com> Date: Mon, 9 Oct 2017 09:27:00 -0400 Subject: [PATCH 174/909] Fix for AVX2 support in Visual Studio (#13525) * Fixed AVX2 support for Visual Studio 2015. * Fixed for portability. --- .../CXX11/src/FixedPoint/PacketMathAVX2.h | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h index 078be83e0d..c210b1712c 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h @@ -1,6 +1,35 @@ #ifndef THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ #define THIRD_PARTY_EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ +#ifdef _MSC_VER + +#include +#include +#include + +#endif + +inline int _mm256_extract_epi16_N0(const __m256i X) +{ + return _mm_extract_epi16(_mm256_extractf128_si256(X, 0 >> 3), 0 % 8); +} + +inline int _mm256_extract_epi16_N1(const __m256i X) +{ + return _mm_extract_epi16(_mm256_extractf128_si256(X, 1 >> 3), 1 % 8); +} + +inline int _mm256_extract_epi8_N0(const __m256i X) +{ + return _mm_extract_epi8(_mm256_extractf128_si256((X), 0 >> 4), 0 % 16); +} + +inline int _mm256_extract_epi8_N1(const __m256i X) +{ + return _mm_extract_epi8(_mm256_extractf128_si256((X), 1 >> 4), 1 % 16); +} + + namespace Eigen { namespace internal { @@ -271,15 +300,15 @@ EIGEN_STRONG_INLINE QInt32 pfirst(const Packet8q32i& a) { } template <> EIGEN_STRONG_INLINE QInt16 pfirst(const Packet16q16i& a) { - return _mm256_extract_epi16(a.val, 0); + return _mm256_extract_epi16_N0(a.val); } template <> EIGEN_STRONG_INLINE QUInt8 pfirst(const Packet32q8u& a) { - return static_cast(_mm256_extract_epi8(a.val, 0)); + return static_cast(_mm256_extract_epi8_N0(a.val)); } template <> EIGEN_STRONG_INLINE QInt8 pfirst(const Packet32q8i& a) { - return _mm256_extract_epi8(a.val, 0); + return _mm256_extract_epi8_N0(a.val); } // Initialize to constant value. @@ -391,7 +420,7 @@ EIGEN_STRONG_INLINE QInt16 predux_min(const Packet16q16i& a) { tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); tmp = _mm256_min_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); - return std::min(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); + return std::min(_mm256_extract_epi16_N0(tmp), _mm256_extract_epi16_N1(tmp)); } template <> EIGEN_STRONG_INLINE QInt16 predux_max(const Packet16q16i& a) { @@ -399,7 +428,7 @@ EIGEN_STRONG_INLINE QInt16 predux_max(const Packet16q16i& a) { tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); tmp = _mm256_max_epi16(tmp, _mm256_shuffle_epi32(tmp, 1)); - return std::max(_mm256_extract_epi16(tmp, 0), _mm256_extract_epi16(tmp, 1)); + return std::max(_mm256_extract_epi16_N0(tmp), _mm256_extract_epi16_N1(tmp)); } template <> @@ -410,8 +439,8 @@ EIGEN_STRONG_INLINE QUInt8 predux_min(const Packet32q8u& a) { tmp = _mm256_min_epu8(tmp, _mm256_shuffle_epi32(tmp, 1)); tmp = _mm256_min_epu8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2))); - return std::min(static_cast(_mm256_extract_epi8(tmp, 0)), - static_cast(_mm256_extract_epi8(tmp, 1))); + return std::min(static_cast(_mm256_extract_epi8_N0(tmp)), + static_cast(_mm256_extract_epi8_N1(tmp))); } template <> EIGEN_STRONG_INLINE QUInt8 predux_max(const Packet32q8u& a) { @@ -421,8 +450,8 @@ EIGEN_STRONG_INLINE QUInt8 predux_max(const Packet32q8u& a) { tmp = _mm256_max_epu8(tmp, _mm256_shuffle_epi32(tmp, 1)); tmp = _mm256_max_epu8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2))); - return std::max(static_cast(_mm256_extract_epi8(tmp, 0)), - static_cast(_mm256_extract_epi8(tmp, 1))); + return std::max(static_cast(_mm256_extract_epi8_N0(tmp)), + static_cast(_mm256_extract_epi8_N1(tmp))); } template <> @@ -431,7 +460,7 @@ EIGEN_STRONG_INLINE QInt8 predux_min(const Packet32q8i& a) { tmp = _mm256_min_epi8(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); tmp = _mm256_min_epi8(tmp, _mm256_shuffle_epi32(tmp, 1)); tmp = _mm256_min_epi8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2))); - return std::min(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1)); + return std::min(_mm256_extract_epi8_N0(tmp), _mm256_extract_epi8_N1(tmp)); } template <> EIGEN_STRONG_INLINE QInt8 predux_max(const Packet32q8i& a) { @@ -439,7 +468,7 @@ EIGEN_STRONG_INLINE QInt8 predux_max(const Packet32q8i& a) { tmp = _mm256_max_epi8(tmp, _mm256_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2))); tmp = _mm256_max_epi8(tmp, _mm256_shuffle_epi32(tmp, 1)); tmp = _mm256_max_epi8(tmp, _mm256_shufflelo_epi16(tmp, _MM_SHUFFLE(1, 0, 3, 2))); - return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1)); + return std::max(_mm256_extract_epi8_N0(tmp), _mm256_extract_epi8_N1(tmp)); } // Vectorized scaling of Packet32q8i by float. -- GitLab From bb789adc1543684512aab1c83b13872b9ca27c63 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 08:14:04 -0700 Subject: [PATCH 175/909] [TF:XLA] Rename HloOpcode::kLogicalX to kX PiperOrigin-RevId: 171536686 --- .../compiler/xla/service/cpu/ir_emitter.cc | 4 +-- .../xla/service/elemental_ir_emitter.cc | 12 +++---- .../compiler/xla/service/hlo_graph_dumper.cc | 6 ++-- .../compiler/xla/service/hlo_instruction.cc | 34 +++++++++---------- .../compiler/xla/service/hlo_matchers.h | 6 ++-- tensorflow/compiler/xla/service/hlo_opcode.cc | 12 +++---- tensorflow/compiler/xla/service/hlo_opcode.h | 6 ++-- .../xla/service/instruction_fusion.cc | 6 ++-- .../compiler/xla/service/shape_inference.cc | 6 ++-- .../compiler/xla/service/user_computation.cc | 6 ++-- 10 files changed, 49 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 8132207699..c9c87f065b 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1511,11 +1511,11 @@ IrEmitter::ReductionGenerator IrEmitter::MatchReductionGenerator( : ir_builder->CreateFMul(lhs, rhs); }; - case HloOpcode::kLogicalAnd: + case HloOpcode::kAnd: return [](llvm::IRBuilder<>* ir_builder, llvm::Value* lhs, llvm::Value* rhs) { return ir_builder->CreateAnd(lhs, rhs); }; - case HloOpcode::kLogicalOr: + case HloOpcode::kOr: return [](llvm::IRBuilder<>* ir_builder, llvm::Value* lhs, llvm::Value* rhs) { return ir_builder->CreateOr(lhs, rhs); }; diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 7117ecb08b..12fb88f39c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -126,7 +126,7 @@ StatusOr ElementalIrEmitter::EmitIntegerUnaryOp( } case HloOpcode::kNegate: return ir_builder_->CreateNeg(operand_value); - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: // It is not sufficient to just call CreateNot() here because a PRED is // represented as an i8 and the truth value is stored only in the bottom // bit. @@ -557,9 +557,9 @@ StatusOr ElementalIrEmitter::EmitIntegerBinaryOp( is_signed ? llvm::ICmpInst::ICMP_SGE : llvm::ICmpInst::ICMP_UGE, lhs_value, rhs_value), lhs_value, rhs_value); - case HloOpcode::kLogicalAnd: + case HloOpcode::kAnd: return ir_builder_->CreateAnd(lhs_value, rhs_value); - case HloOpcode::kLogicalOr: + case HloOpcode::kOr: return ir_builder_->CreateOr(lhs_value, rhs_value); default: return Unimplemented("binary integer op '%s'", @@ -799,7 +799,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kSign: case HloOpcode::kSin: case HloOpcode::kTanh: - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: return [this, hlo, &operand_to_generator]( const IrArray::Index& index) -> StatusOr { TF_ASSIGN_OR_RETURN(llvm::Value * operand_value, @@ -821,8 +821,8 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kPower: case HloOpcode::kRemainder: case HloOpcode::kSubtract: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kOr: return [this, hlo, &operand_to_generator]( const IrArray::Index& index) -> StatusOr { const HloInstruction* lhs = hlo->operand(0); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 9b4a2f1048..20fc85c0e9 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -777,9 +777,9 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) { case HloOpcode::kIsFinite: case HloOpcode::kLe: case HloOpcode::kLog: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalNot: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kNot: + case HloOpcode::kOr: case HloOpcode::kLt: case HloOpcode::kMaximum: case HloOpcode::kMinimum: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 7419ab8704..77a748163e 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -126,7 +126,7 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, case HloOpcode::kFloor: case HloOpcode::kIsFinite: case HloOpcode::kLog: - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: case HloOpcode::kNegate: case HloOpcode::kSign: case HloOpcode::kSin: @@ -161,8 +161,8 @@ HloInstruction::CreateGetTupleElement(const Shape& shape, case (HloOpcode::kPower): case (HloOpcode::kRemainder): case (HloOpcode::kSubtract): - case (HloOpcode::kLogicalAnd): - case (HloOpcode::kLogicalOr): + case (HloOpcode::kAnd): + case (HloOpcode::kOr): break; default: LOG(FATAL) << "Invalid binary instruction opcode " @@ -879,7 +879,7 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kIsFinite: case HloOpcode::kFloor: case HloOpcode::kLog: - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: case HloOpcode::kNegate: case HloOpcode::kSign: case HloOpcode::kSin: @@ -903,8 +903,8 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( case HloOpcode::kMinimum: case HloOpcode::kPower: case HloOpcode::kRemainder: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kOr: CHECK_EQ(new_operands.size(), 2); return CreateBinary(shape, opcode_, new_operands[0], new_operands[1]); // Ternary ops. @@ -1258,9 +1258,9 @@ bool HloInstruction::IdenticalSlowPath( case HloOpcode::kIsFinite: case HloOpcode::kLe: case HloOpcode::kLog: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalNot: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kNot: + case HloOpcode::kOr: case HloOpcode::kLt: case HloOpcode::kMaximum: case HloOpcode::kMinimum: @@ -1957,9 +1957,9 @@ Status HloInstruction::Visit(DfsHloVisitor* visitor) { return visitor->HandleMaximum(this); case HloOpcode::kMinimum: return visitor->HandleMinimum(this); - case HloOpcode::kLogicalAnd: + case HloOpcode::kAnd: return visitor->HandleLogicalAnd(this, operands_[0], operands_[1]); - case HloOpcode::kLogicalOr: + case HloOpcode::kOr: return visitor->HandleLogicalOr(this, operands_[0], operands_[1]); case HloOpcode::kConcatenate: return visitor->HandleConcatenate(this, operands_); @@ -2016,7 +2016,7 @@ Status HloInstruction::Visit(DfsHloVisitor* visitor) { return visitor->HandleSin(this, operands_[0]); case HloOpcode::kIsFinite: return visitor->HandleIsFinite(this, operands_[0]); - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: return visitor->HandleLogicalNot(this, operands_[0]); case HloOpcode::kBitcast: return visitor->HandleBitcast(this); @@ -2319,8 +2319,8 @@ bool HloInstruction::IsElementwiseBinary() const { case HloOpcode::kPower: case HloOpcode::kRemainder: case HloOpcode::kSubtract: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kOr: return true; default: return false; @@ -2344,7 +2344,7 @@ bool HloInstruction::IsElementwise() const { case HloOpcode::kFloor: case HloOpcode::kIsFinite: case HloOpcode::kLog: - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: case HloOpcode::kNegate: case HloOpcode::kReducePrecision: case HloOpcode::kSign: @@ -2368,8 +2368,8 @@ bool HloInstruction::IsElementwise() const { case HloOpcode::kPower: case HloOpcode::kRemainder: case HloOpcode::kSubtract: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kOr: return true; // Ternary elementwise operations. diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index b1b3dd61a6..ab5e5463fa 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -79,9 +79,9 @@ HLO_MATCHER(Infeed); HLO_MATCHER(IsFinite); HLO_MATCHER(Le); HLO_MATCHER(Log); -HLO_MATCHER(LogicalAnd); -HLO_MATCHER(LogicalNot); -HLO_MATCHER(LogicalOr); +HLO_MATCHER(And); +HLO_MATCHER(Not); +HLO_MATCHER(Or); HLO_MATCHER(Lt); HLO_MATCHER(Map); HLO_MATCHER(Maximum); diff --git a/tensorflow/compiler/xla/service/hlo_opcode.cc b/tensorflow/compiler/xla/service/hlo_opcode.cc index 83fe6ef6c9..d3d78f4a99 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.cc +++ b/tensorflow/compiler/xla/service/hlo_opcode.cc @@ -95,12 +95,12 @@ string HloOpcodeString(HloOpcode opcode) { return "less-than-or-equal-to"; case HloOpcode::kLog: return "log"; - case HloOpcode::kLogicalAnd: - return "logical-and"; - case HloOpcode::kLogicalOr: - return "logical-or"; - case HloOpcode::kLogicalNot: - return "logical-not"; + case HloOpcode::kAnd: + return "and"; + case HloOpcode::kOr: + return "or"; + case HloOpcode::kNot: + return "not"; case HloOpcode::kLt: return "less-than"; case HloOpcode::kMap: diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h index 7b23249640..9c26f360fb 100644 --- a/tensorflow/compiler/xla/service/hlo_opcode.h +++ b/tensorflow/compiler/xla/service/hlo_opcode.h @@ -62,9 +62,9 @@ enum class HloOpcode { kIsFinite, kLe, kLog, - kLogicalAnd, - kLogicalNot, - kLogicalOr, + kAnd, + kNot, + kOr, kLt, kMap, kMaximum, diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 7a27381642..e08e4e4d69 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -53,9 +53,9 @@ namespace xla { case HloOpcode::kInfeed: case HloOpcode::kIsFinite: case HloOpcode::kLe: - case HloOpcode::kLogicalAnd: - case HloOpcode::kLogicalNot: - case HloOpcode::kLogicalOr: + case HloOpcode::kAnd: + case HloOpcode::kNot: + case HloOpcode::kOr: case HloOpcode::kLt: case HloOpcode::kMaximum: case HloOpcode::kMinimum: diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 29221d2d29..06a68c81e4 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -57,7 +57,7 @@ UnaryOperation OpcodeToUnaryOperation(HloOpcode opcode) { return UNOP_IS_FINITE; case HloOpcode::kLog: return UNOP_LOG; - case HloOpcode::kLogicalNot: + case HloOpcode::kNot: return UNOP_LOGICAL_NOT; case HloOpcode::kNegate: return UNOP_NEGATE; @@ -113,9 +113,9 @@ BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) { return BINOP_POW; case HloOpcode::kRemainder: return BINOP_REM; - case HloOpcode::kLogicalOr: + case HloOpcode::kOr: return BINOP_LOGICAL_OR; - case HloOpcode::kLogicalAnd: + case HloOpcode::kAnd: return BINOP_LOGICAL_AND; default: LOG(FATAL) << "unhandled opcode " << opcode; diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 3f62501bb5..05f5476b88 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -59,7 +59,7 @@ HloOpcode UnaryOperationToHloOpcode(UnaryOperation unop) { case UNOP_LOG: return HloOpcode::kLog; case UNOP_LOGICAL_NOT: - return HloOpcode::kLogicalNot; + return HloOpcode::kNot; case UNOP_NEGATE: return HloOpcode::kNegate; case UNOP_ROUND_NEAREST_AFZ: @@ -112,9 +112,9 @@ HloOpcode BinaryOperationToHloOpcode(BinaryOperation binop) { case BINOP_REM: return HloOpcode::kRemainder; case BINOP_LOGICAL_OR: - return HloOpcode::kLogicalOr; + return HloOpcode::kOr; case BINOP_LOGICAL_AND: - return HloOpcode::kLogicalAnd; + return HloOpcode::kAnd; default: LOG(FATAL) << "unhandled operation " << binop; } -- GitLab From edfb9bb100f9814bf1bbcff2e8a32f12f049bfcc Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 9 Oct 2017 08:56:08 -0700 Subject: [PATCH 176/909] Correct documentation typo. Fixes #13576 PiperOrigin-RevId: 171540987 --- tensorflow/python/ops/nn_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index babe2efba0..8876591e53 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1844,7 +1844,7 @@ def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable= Raises: ValueError: If logits are scalars (need to have rank >= 1) or if the rank - of the labels is not equal to the rank of the labels minus one. + of the labels is not equal to the rank of the logits minus one. """ _ensure_xent_args("sparse_softmax_cross_entropy_with_logits", _sentinel, labels, logits) -- GitLab From b0b92fd60b44808925fa554190b80d09ced67677 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 9 Oct 2017 09:06:45 -0700 Subject: [PATCH 177/909] [tf.data] Add new custom transformation: `tf.contrib.data.scan()`. `scan()` is similar to `Dataset.map()`, with the addition of a generic piece of state that is accumulated across the elements of the input, and that may be used in the computation of the output elements. This change also updates `rejection_resample()` to use `scan()` rather than a local `tf.ResourceVariable` for accumulating the number of times each class has been encountered. PiperOrigin-RevId: 171542274 --- .../contrib/data/python/kernel_tests/BUILD | 27 +++ .../data/python/kernel_tests/resample_test.py | 22 -- .../kernel_tests/scan_dataset_op_test.py | 128 +++++++++++ tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/resampling.py | 49 ++-- .../contrib/data/python/ops/scan_ops.py | 182 +++++++++++++++ tensorflow/core/kernels/BUILD | 15 ++ tensorflow/core/kernels/scan_dataset_op.cc | 213 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 15 ++ 9 files changed, 603 insertions(+), 49 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py create mode 100644 tensorflow/contrib/data/python/ops/scan_ops.py create mode 100644 tensorflow/core/kernels/scan_dataset_op.cc diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c34c9dad9b..faf051203c 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -237,6 +237,33 @@ py_test( ], ) +py_test( + name = "scan_dataset_op_test", + size = "small", + srcs = ["scan_dataset_op_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:functional_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:lookup_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:script_ops", + "//tensorflow/python:string_ops", + "//tensorflow/python:util", + "//tensorflow/python:variable_scope", + "//third_party/py/numpy", + ], +) + py_test( name = "range_dataset_op_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/resample_test.py b/tensorflow/contrib/data/python/kernel_tests/resample_test.py index a19c917075..0ac8d7359f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/resample_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/resample_test.py @@ -22,11 +22,8 @@ import numpy as np from tensorflow.contrib.data.python.ops import dataset_ops from tensorflow.contrib.data.python.ops import resampling from tensorflow.python.framework import errors -from tensorflow.python.framework import ops from tensorflow.python.ops import string_ops -from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.training import device_setter from tensorflow.python.util import compat @@ -51,10 +48,8 @@ class ResampleTest(test.TestCase): seed=27)).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() - variable_init_op = variables.local_variables_initializer() with self.test_session() as sess: - sess.run(variable_init_op) sess.run(init_op) returned = [] with self.assertRaises(errors.OutOfRangeError): @@ -75,23 +70,6 @@ class ResampleTest(test.TestCase): returned_dist = class_counts / total_returned self.assertAllClose(target_dist, returned_dist, atol=1e-2) - def testVariableDevicePlacement(self): - classes = np.random.randint(5, size=(20000,)) # Uniformly sampled - target_dist = [0.9, 0.05, 0.05, 0.0, 0.0] - with ops.device( - device_setter.replica_device_setter(ps_tasks=1, ps_device="/cpu:0")): - _ = (dataset_ops.Dataset.from_tensor_slices(classes).shuffle( - 200, seed=21).map(lambda c: (c, string_ops.as_string(c))).apply( - resampling.rejection_resample( - target_dist=target_dist, - initial_dist=None, - class_func=lambda c, _: c, - seed=27))) - - self.assertEqual(1, len(variables.local_variables())) - self.assertEqual(b"", - compat.as_bytes(variables.local_variables()[0].device)) - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py new file mode 100644 index 0000000000..5338ec56bf --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/scan_dataset_op_test.py @@ -0,0 +1,128 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +import numpy as np + +from tensorflow.contrib.data.python.ops import scan_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class ScanDatasetTest(test.TestCase): + + def _count(self, start, step): + return dataset_ops.Dataset.from_tensors(0).repeat(None).apply( + scan_ops.scan(start, lambda state, _: (state + step, state))) + + def testCount(self): + start = array_ops.placeholder(dtypes.int32, shape=[]) + step = array_ops.placeholder(dtypes.int32, shape=[]) + take = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = self._count(start, step).take(take).make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + + for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10), + (10, 2, 10), (10, -1, 10), + (10, -2, 10)]: + sess.run(iterator.initializer, + feed_dict={start: start_val, step: step_val, take: take_val}) + for expected, _ in zip( + itertools.count(start_val, step_val), range(take_val)): + self.assertEqual(expected, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testFibonacci(self): + iterator = dataset_ops.Dataset.from_tensors(1).repeat(None).apply( + scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])) + ).make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + self.assertEqual(1, sess.run(next_element)) + self.assertEqual(1, sess.run(next_element)) + self.assertEqual(2, sess.run(next_element)) + self.assertEqual(3, sess.run(next_element)) + self.assertEqual(5, sess.run(next_element)) + self.assertEqual(8, sess.run(next_element)) + + def testChangingStateShape(self): + # Test the fixed-point shape invariant calculations: start with + # initial values with known shapes, and use a scan function that + # changes the size of the state on each element. + def _scan_fn(state, input_value): + # Statically known rank, but dynamic length. + ret_longer_vector = array_ops.concat([state[0], state[0]], 0) + # Statically unknown rank. + ret_larger_rank = array_ops.expand_dims(state[1], 0) + return (ret_longer_vector, ret_larger_rank), (state, input_value) + + dataset = dataset_ops.Dataset.from_tensors(0).repeat(5).apply( + scan_ops.scan(([0], 1), _scan_fn)) + self.assertEqual([None], dataset.output_shapes[0][0].as_list()) + self.assertIs(None, dataset.output_shapes[0][1].ndims) + self.assertEqual([], dataset.output_shapes[1].as_list()) + + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + for i in range(5): + (longer_vector_val, larger_rank_val), _ = sess.run(next_element) + self.assertAllEqual([0] * (2**i), longer_vector_val) + self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testIncorrectStateType(self): + + def _scan_fn(state, _): + return constant_op.constant(1, dtype=dtypes.int64), state + + dataset = dataset_ops.Dataset.range(10) + with self.assertRaisesRegexp( + TypeError, + "The element types for the new state must match the initial state."): + dataset.apply( + scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn)) + + def testIncorrectReturnType(self): + + def _scan_fn(unused_state, unused_input_value): + return constant_op.constant(1, dtype=dtypes.int64) + + dataset = dataset_ops.Dataset.range(10) + with self.assertRaisesRegexp( + TypeError, + "The scan function must return a pair comprising the new state and the " + "output value."): + dataset.apply( + scan_ops.scan(constant_op.constant(1, dtype=dtypes.int32), _scan_fn)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 690cccbea3..2a9b41d6df 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -49,6 +49,7 @@ py_library( "error_ops.py", "grouping.py", "resampling.py", + "scan_ops.py", "sloppy_ops.py", ], srcs_version = "PY2AND3", diff --git a/tensorflow/contrib/data/python/ops/resampling.py b/tensorflow/contrib/data/python/ops/resampling.py index ee46f3e852..56f526a330 100644 --- a/tensorflow/contrib/data/python/ops/resampling.py +++ b/tensorflow/contrib/data/python/ops/resampling.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import scan_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -28,7 +29,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import resource_variable_ops def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): @@ -68,26 +68,20 @@ def rejection_resample(class_func, target_dist, initial_dist=None, seed=None): num_classes = (target_dist_t.shape[0].value or array_ops.shape(target_dist_t)[0]) smoothing_constant = 10 - # Disable device functions and colocation constraints so that the variable - # will be placed with the eventual DT_VARIANT dataset tensor. - with ops.colocate_with(None, ignore_existing=True): - num_examples_per_class_seen = resource_variable_ops.ResourceVariable( - initial_value=array_ops.fill([num_classes], - np.int64(smoothing_constant)), - trainable=False, - collections=[ops.GraphKeys.LOCAL_VARIABLES], - name="local_class_count", - dtype=dtypes.int64) - - def update_estimate_and_tile(c): - return array_ops.tile( - array_ops.expand_dims( - _estimate_data_distribution(c, num_examples_per_class_seen), 0), - [dist_estimation_batch_size, 1]) + initial_examples_per_class_seen = array_ops.fill( + [num_classes], np.int64(smoothing_constant)) + + def update_estimate_and_tile(num_examples_per_class_seen, c): + updated_examples_per_class_seen, dist = _estimate_data_distribution( + c, num_examples_per_class_seen) + tiled_dist = array_ops.tile( + array_ops.expand_dims(dist, 0), [dist_estimation_batch_size, 1]) + return updated_examples_per_class_seen, tiled_dist initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) - .map(update_estimate_and_tile).apply(batching - .unbatch())) + .apply(scan_ops.scan(initial_examples_per_class_seen, + update_estimate_and_tile)) + .apply(batching.unbatch())) acceptance_dist_ds = initial_dist_ds.map( lambda initial: _calculate_acceptance_probs(initial, target_dist_t)) @@ -174,20 +168,21 @@ def _estimate_data_distribution(c, num_examples_per_class_seen): Args: c: The class labels. Type `int32`, shape `[batch_size]`. - num_examples_per_class_seen: A `ResourceVariable` containing counts. - Type `int64`, shape `[num_classes]`. + num_examples_per_class_seen: Type `int64`, shape `[num_classes]`, + containing counts. Returns: + num_examples_per_lass_seen: Updated counts. Type `int64`, shape + `[num_classes]`. dist: The updated distribution. Type `float32`, shape `[num_classes]`. """ num_classes = num_examples_per_class_seen.get_shape()[0].value - # Update the class-count based on what labels are seen in - # batch. But do this asynchronously to avoid performing a - # cross-device round-trip. Just use the cached value. - num_examples_per_class_seen = num_examples_per_class_seen.assign_add( - math_ops.reduce_sum( + # Update the class-count based on what labels are seen in batch. + num_examples_per_class_seen = math_ops.add( + num_examples_per_class_seen, math_ops.reduce_sum( array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) init_prob_estimate = math_ops.truediv( num_examples_per_class_seen, math_ops.reduce_sum(num_examples_per_class_seen)) - return math_ops.cast(init_prob_estimate, dtypes.float32) + dist = math_ops.cast(init_prob_estimate, dtypes.float32) + return num_examples_per_class_seen, dist diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py new file mode 100644 index 0000000000..5acaed48a3 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -0,0 +1,182 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Scan dataset transformation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops + + +class _ScanDataset(dataset_ops.Dataset): + """A dataset that scans a function across its input.""" + + def __init__(self, input_dataset, initial_state, scan_func): + """See `scan()` for details.""" + super(_ScanDataset, self).__init__() + self._input_dataset = input_dataset + + with ops.name_scope("initial_state"): + self._initial_state = nest.pack_sequence_as(initial_state, [ + ops.convert_to_tensor(t, name="component_%d" % i) + for i, t in enumerate(nest.flatten(initial_state)) + ]) + + # Compute initial values for the state shapes and types based on + # the initial state. These will be refined by running + # `tf_scan_func` one or more times below. + self._state_shapes = nest.pack_sequence_as( + self._initial_state, + [t.shape for t in nest.flatten(self._initial_state)]) + self._state_types = nest.pack_sequence_as( + self._initial_state, + [t.dtype for t in nest.flatten(self._initial_state)]) + + # Will be populated by calling `tf_scan_func`. + self._output_shapes = None + self._output_types = None + + # Iteratively rerun the scan function until reaching a fixed pont on + # `self._state_shapes`. + need_to_rerun = True + while need_to_rerun: + + flat_state_shapes = nest.flatten(self._state_shapes) + flat_state_types = nest.flatten(self._state_types) + + # Create a list in which `tf_scan_func` will store the s + flat_new_state_shapes = [] + + @function.Defun( + *(flat_state_types + nest.flatten(input_dataset.output_types))) + def tf_scan_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + # Pass in shape information from the state and input_dataset. + for arg, shape in zip( + args, + flat_state_shapes + nest.flatten(input_dataset.output_shapes)): + arg.set_shape(shape) + + pivot = len(flat_state_shapes) + old_state = nest.pack_sequence_as(self._initial_state, args[:pivot]) + input_value = nest.pack_sequence_as(input_dataset.output_types, + args[pivot:]) + + ret = scan_func(old_state, input_value) + if not isinstance(ret, collections.Sequence) or len(ret) != 2: + raise TypeError("The scan function must return a pair comprising the " + "new state and the output value.") + new_state, output_value = ret + + flat_new_state = [ + ops.convert_to_tensor(t) for t in nest.flatten(new_state) + ] + flat_output_value = [ + ops.convert_to_tensor(t) for t in nest.flatten(output_value) + ] + + # Extract shape information from the returned values. + flat_new_state_shapes.extend([t.shape for t in flat_new_state]) + self._output_shapes = nest.pack_sequence_as( + output_value, [t.shape for t in flat_output_value]) + + # Extract and validate type information from the returned values. + for t, dtype in zip(flat_new_state, flat_state_types): + if t.dtype != dtype: + raise TypeError( + "The element types for the new state must match the initial " + "state. Expected %s; got %s." % + (self._state_types, nest.pack_sequence_as( + self._state_types, [t.dtype for t in flat_new_state]))) + self._output_types = nest.pack_sequence_as( + output_value, [t.dtype for t in flat_output_value]) + + return flat_new_state + flat_output_value + + # Use the private method that will execute `tf_scan_func` but delay + # adding it to the graph in case we need to rerun the function. + tf_scan_func._create_definition_if_needed() # pylint: disable=protected-access + + weakened_state_shapes = [ + original.most_specific_compatible_shape(new) + for original, new in zip(flat_state_shapes, flat_new_state_shapes) + ] + + need_to_rerun = False + for original_shape, weakened_shape in zip(flat_state_shapes, + weakened_state_shapes): + if original_shape.ndims is not None and ( + weakened_shape.ndims is None or + original_shape.as_list() != weakened_shape.as_list()): + need_to_rerun = True + break + + if need_to_rerun: + # NOTE(mrry): `self._output_shapes` will be overwritten when we rerun + # `tf_scan_func`. + self._state_shapes = nest.pack_sequence_as(self._state_shapes, + weakened_state_shapes) + + self._scan_func = tf_scan_func + + def _as_variant_tensor(self): + input_t = self._input_dataset._as_variant_tensor() # pylint: disable=protected-access + return gen_dataset_ops.scan_dataset( + input_t, + nest.flatten(self._initial_state), + self._scan_func.captured_inputs, + f=self._scan_func, + output_types=nest.flatten(self.output_types), + output_shapes=nest.flatten(self.output_shapes)) + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + +def scan(initial_state, scan_func): + """A transformation that scans a function across an input dataset. + + This transformation is a stateful relative of @{tf.data.Dataset.map}. + In addition to mapping `scan_func` across the elements of the input dataset, + `scan()` accumulates one or more state tensors, whose initial values are + `initial_state`. + + Args: + initial_state: A nested structure of tensors, representing the initial state + of the accumulator. + scan_func: A function that maps `(old_state, input_element)` to + `(new_state, output_element). It must take two arguments and return a + pair of nested structures of tensors. The `new_state` must match the + structure of `initial_state`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + def _apply_fn(dataset): + return _ScanDataset(dataset, initial_state, scan_func) + + return _apply_fn diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index bdc6faefbc..a3aa905415 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5790,6 +5790,20 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "scan_dataset_op", + srcs = ["scan_dataset_op.cc"], + deps = [ + ":captured_function", + ":dataset", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + tf_kernel_library( name = "flat_map_dataset_op", srcs = ["flat_map_dataset_op.cc"], @@ -6061,6 +6075,7 @@ tf_kernel_library( ":range_dataset_op", ":reader_dataset_ops", ":repeat_dataset_op", + ":scan_dataset_op", ":shuffle_dataset_op", ":skip_dataset_op", ":sloppy_interleave_dataset_op", diff --git a/tensorflow/core/kernels/scan_dataset_op.cc b/tensorflow/core/kernels/scan_dataset_op.cc new file mode 100644 index 0000000000..76c219f1ae --- /dev/null +++ b/tensorflow/core/kernels/scan_dataset_op.cc @@ -0,0 +1,213 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/captured_function.h" +#include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { + +namespace { + +// See documentation in ../ops/dataset_ops.cc for a high-level +// description of the following op. + +class ScanDatasetOp : public UnaryDatasetOpKernel { + public: + explicit ScanDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx), + graph_def_version_(ctx->graph_def_version()) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("Tstate", &state_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + } + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + OpInputList initial_state_inputs; + OP_REQUIRES_OK(ctx, + ctx->input_list("initial_state", &initial_state_inputs)); + std::vector initial_state; + initial_state.reserve(initial_state_inputs.size()); + for (const Tensor& t : initial_state_inputs) { + initial_state.push_back(t); + } + + OpInputList inputs; + OP_REQUIRES_OK(ctx, ctx->input_list("other_arguments", &inputs)); + std::vector other_arguments; + other_arguments.reserve(inputs.size()); + for (const Tensor& t : inputs) { + other_arguments.push_back(t); + } + + std::unique_ptr captured_func; + OP_REQUIRES_OK(ctx, CapturedFunction::Create(ctx, func_, graph_def_version_, + std::move(other_arguments), + &captured_func)); + + *output = + new Dataset(input, std::move(initial_state), std::move(captured_func), + state_types_, output_types_, output_shapes_); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(const DatasetBase* input, std::vector initial_state, + std::unique_ptr captured_func, + const DataTypeVector& state_types, + const DataTypeVector& output_types, + const std::vector& output_shapes) + : input_(input), + initial_state_(std::move(initial_state)), + captured_func_(std::move(captured_func)), + state_types_(state_types), + output_types_(output_types), + output_shapes_(output_shapes) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::Scan")})); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { return "ScanDatasetOp::Dataset"; } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)), + state_(params.dataset->initial_state_) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + + std::vector next_element; + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, &next_element, end_of_sequence)); + if (*end_of_sequence) { + return Status::OK(); + } + + std::vector args; + args.reserve(state_.size() + next_element.size()); + std::copy(state_.begin(), state_.end(), std::back_inserter(args)); + std::copy(next_element.begin(), next_element.end(), + std::back_inserter(args)); + + FunctionLibraryRuntime::Options opts; + opts.step_id = CapturedFunction::generate_step_id(); + ScopedStepContainer step_container( + opts.step_id, [this, ctx](const string& name) { + dataset() + ->captured_func_->resource_manager() + ->Cleanup(name) + .IgnoreError(); + }); + opts.step_container = &step_container; + opts.runner = ctx->runner(); + std::vector state_and_output; + state_and_output.reserve(dataset()->state_types_.size() + + output_dtypes().size()); + Status s = + dataset()->captured_func_->Run(opts, args, &state_and_output); + if (s.ok()) { + state_.clear(); + size_t i = 0; + for (; i < dataset()->state_types_.size(); ++i) { + if (state_and_output[i].dtype() != dataset()->state_types_[i]) { + return errors::InvalidArgument( + "Got wrong type for scan_func return value ", i, + " (expected ", DataTypeString(dataset()->state_types_[i]), + ", got ", DataTypeString(state_and_output[i].dtype()), ")."); + } + state_.push_back(std::move(state_and_output[i])); + } + for (; i < state_and_output.size(); ++i) { + const size_t output_index = i - dataset()->state_types_.size(); + if (state_and_output[i].dtype() != output_dtypes()[output_index]) { + return errors::InvalidArgument( + "Got wrong type for scan_func return value ", i, + " (expected ", + DataTypeString(dataset()->state_types_[output_index]), + ", got ", DataTypeString(state_and_output[i].dtype()), ")."); + } + if (!output_shapes()[output_index].IsCompatibleWith( + state_and_output[i].shape())) { + return errors::InvalidArgument( + "Got wrong shape for scan_func return value ", i, + " (expected ", output_shapes()[output_index].DebugString(), + ", got ", state_and_output[i].shape().DebugString(), ")."); + } + + out_tensors->push_back(std::move(state_and_output[i])); + } + } else if (errors::IsOutOfRange(s)) { + // `f` may deliberately raise `errors::OutOfRange` to indicate + // that we should terminate the iteration early. + *end_of_sequence = true; + return Status::OK(); + } + return s; + } + + private: + mutex mu_; + const std::unique_ptr input_impl_ GUARDED_BY(mu_); + std::vector state_ GUARDED_BY(mu_); + }; + + const DatasetBase* const input_; + const std::vector initial_state_; + const std::unique_ptr captured_func_; + const DataTypeVector state_types_; + const DataTypeVector output_types_; + const std::vector output_shapes_; + }; + + const int graph_def_version_; + DataTypeVector state_types_; + DataTypeVector output_types_; + std::vector output_shapes_; + NameAttrList func_; +}; + +REGISTER_KERNEL_BUILDER(Name("ScanDataset").Device(DEVICE_CPU), ScanDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index c0e84c8bb0..ac15a3f71b 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -198,6 +198,21 @@ buffer_size: The maximum number of elements to buffer in an iterator over this dataset. )doc"); +REGISTER_OP("ScanDataset") + .Input("input_dataset: variant") + .Input("initial_state: Tstate") + .Input("other_arguments: Targuments") + .Output("handle: variant") + .Attr("f: func") + .Attr("Tstate: list(type) >= 1") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Creates a dataset successively reduces `f` over the elements of `input_dataset`. +)doc"); + REGISTER_OP("FlatMapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") -- GitLab From 4878a28ac3e5b63cd820c9aa13cb0c4f0025ec23 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 09:20:06 -0700 Subject: [PATCH 178/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171543801 --- .../core/ops/compat/ops_history.v1.pbtxt | 46 ++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 47 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index f8667177cc..a449fc1452 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -24743,6 +24743,52 @@ op { } } } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "ScatterAdd" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 9abb4f7a5e..88e57ea0cb 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -23407,6 +23407,53 @@ op { summary: "Outputs a `Summary` protocol buffer with scalar values." description: "The input `tags` and `values` must have the same shape. The generated summary\nhas a summary value for each tag-value pair in `tags` and `values`." } +op { + name: "ScanDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Creates a dataset successively reduces `f` over the elements of `input_dataset`." +} op { name: "ScatterAdd" input_arg { -- GitLab From 022b25cd72af3127180728baf20351630a294609 Mon Sep 17 00:00:00 2001 From: Sylvus Date: Mon, 9 Oct 2017 17:48:14 +0100 Subject: [PATCH 179/909] Fix for the IOU metric (#12709) * Fixed mean iou case when a class does not appear in the labels nor in the prediction. * Added 3 tests for both mean_iou and streaming_mean_iou, 2 of which would fail with the previous code and one to make sure the behavior is still correct in the normal case. Fixed broken tests as well. * Added check for div by 0 in iou metric. * Add space around operator As per style guide. --- .../metrics/python/ops/metric_ops_test.py | 54 ++++++++++++++++++- .../python/kernel_tests/metrics_test.py | 51 +++++++++++++++++- tensorflow/python/ops/metrics_impl.py | 14 ++++- 3 files changed, 116 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 9b959b43a9..0f7f83f764 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -4978,7 +4978,7 @@ class StreamingMeanIOUTest(test.TestCase): sess.run(variables.local_variables_initializer()) for _ in range(5): sess.run(update_op) - desired_output = np.mean([1.0 / 3.0, 2.0 / 4.0, 0.]) + desired_output = np.mean([1.0 / 3.0, 2.0 / 4.0]) self.assertAlmostEqual(desired_output, miou.eval()) def testUpdateOpEvalIsAccumulatedConfusionMatrix(self): @@ -5060,6 +5060,58 @@ class StreamingMeanIOUTest(test.TestCase): desired_miou = np.mean([2. / 4., 4. / 6.]) self.assertAlmostEqual(desired_miou, miou.eval()) + def testMissingClassInLabels(self): + labels = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 0, 0, 0, 0, 1]], + [[1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0]]]) + predictions = constant_op.constant([ + [[0, 0, 2, 1, 1, 0], + [0, 1, 2, 2, 0, 1]], + [[0, 0, 2, 1, 1, 1], + [1, 1, 2, 0, 0, 0]]]) + num_classes = 3 + with self.test_session() as sess: + miou, update_op = metrics.streaming_mean_iou( + predictions, labels, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[7, 4, 3], [3, 5, 2], [0, 0, 0]], update_op.eval()) + self.assertAlmostEqual( + 1 / 3 * (7 / (7 + 3 + 7) + 5 / (5 + 4 + 5) + 0 / (0 + 5 + 0)), + miou.eval()) + + def testMissingClassOverallSmall(self): + labels = constant_op.constant([0]) + predictions = constant_op.constant([0]) + num_classes = 2 + with self.test_session() as sess: + miou, update_op = metrics.streaming_mean_iou( + predictions, labels, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[1, 0], [0, 0]], update_op.eval()) + self.assertAlmostEqual(1, miou.eval()) + + def testMissingClassOverallLarge(self): + labels = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 0, 0, 0, 0, 1]], + [[1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0]]]) + predictions = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 1, 0, 0, 1, 1]], + [[0, 0, 0, 1, 1, 1], + [1, 1, 1, 0, 0, 0]]]) + num_classes = 3 + with self.test_session() as sess: + miou, update_op = metrics.streaming_mean_iou( + predictions, labels, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[9, 5, 0], [3, 7, 0], [0, 0, 0]], update_op.eval()) + self.assertAlmostEqual( + 1 / 2 * (9 / (9 + 3 + 5) + 7 / (7 + 5 + 3)), miou.eval()) + class StreamingConcatTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index 2472b2a2a6..804346e6e7 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -3331,7 +3331,7 @@ class MeanIOUTest(test.TestCase): sess.run(variables.local_variables_initializer()) for _ in range(5): sess.run(update_op) - desired_output = np.mean([1.0 / 3.0, 2.0 / 4.0, 0.]) + desired_output = np.mean([1.0 / 3.0, 2.0 / 4.0]) self.assertAlmostEqual(desired_output, miou.eval()) def testUpdateOpEvalIsAccumulatedConfusionMatrix(self): @@ -3410,6 +3410,55 @@ class MeanIOUTest(test.TestCase): desired_miou = np.mean([2. / 4., 4. / 6.]) self.assertAlmostEqual(desired_miou, miou.eval()) + def testMissingClassInLabels(self): + labels = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 0, 0, 0, 0, 1]], + [[1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0]]]) + predictions = constant_op.constant([ + [[0, 0, 2, 1, 1, 0], + [0, 1, 2, 2, 0, 1]], + [[0, 0, 2, 1, 1, 1], + [1, 1, 2, 0, 0, 0]]]) + num_classes = 3 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou(labels, predictions, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[7, 4, 3], [3, 5, 2], [0, 0, 0]], update_op.eval()) + self.assertAlmostEqual( + 1 / 3 * (7 / (7 + 3 + 7) + 5 / (5 + 4 + 5) + 0 / (0 + 5 + 0)), + miou.eval()) + + def testMissingClassOverallSmall(self): + labels = constant_op.constant([0]) + predictions = constant_op.constant([0]) + num_classes = 2 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou(labels, predictions, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[1, 0], [0, 0]], update_op.eval()) + self.assertAlmostEqual(1, miou.eval()) + + def testMissingClassOverallLarge(self): + labels = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 0, 0, 0, 0, 1]], + [[1, 1, 1, 1, 1, 1], + [0, 0, 0, 0, 0, 0]]]) + predictions = constant_op.constant([ + [[0, 0, 1, 1, 0, 0], + [1, 1, 0, 0, 1, 1]], + [[0, 0, 0, 1, 1, 1], + [1, 1, 1, 0, 0, 0]]]) + num_classes = 3 + with self.test_session() as sess: + miou, update_op = metrics.mean_iou(labels, predictions, num_classes) + sess.run(variables.local_variables_initializer()) + self.assertAllEqual([[9, 5, 0], [3, 7, 0], [0, 0, 0]], update_op.eval()) + self.assertAlmostEqual( + 1 / 2 * (9 / (9 + 3 + 5) + 7 / (7 + 5 + 3)), miou.eval()) + class MeanPerClassAccuracyTest(test.TestCase): diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 4c3ebb3aae..c40273b047 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -949,6 +949,12 @@ def mean_iou(labels, cm_diag = math_ops.to_float(array_ops.diag_part(total_cm)) denominator = sum_over_row + sum_over_col - cm_diag + # The mean is only computed over classes that appear in the + # label or prediction tensor. If the denominator is 0, we need to + # ignore the class. + num_valid_entries = math_ops.reduce_sum(math_ops.cast( + math_ops.not_equal(denominator, 0), dtype=dtypes.float32)) + # If the value of the denominator is 0, set it to 1 to avoid # zero division. denominator = array_ops.where( @@ -956,7 +962,13 @@ def mean_iou(labels, denominator, array_ops.ones_like(denominator)) iou = math_ops.div(cm_diag, denominator) - return math_ops.reduce_mean(iou, name=name) + + # If the number of valid entries is 0 (no classes) we return 0. + result = array_ops.where( + math_ops.greater(num_valid_entries, 0), + math_ops.reduce_sum(iou, name=name) / num_valid_entries, + 0) + return result mean_iou_v = compute_mean_iou('mean_iou') -- GitLab From 7e2b50d8490f573b470ca97bd06a4677830db738 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 09:45:16 -0700 Subject: [PATCH 180/909] Update docs of MomentumOptimizer about use_nesterov and of RMSProp about momentum PiperOrigin-RevId: 171546603 --- tensorflow/python/training/momentum.py | 5 ++++- tensorflow/python/training/rmsprop.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py index f34ff22f07..7c00e219fd 100644 --- a/tensorflow/python/training/momentum.py +++ b/tensorflow/python/training/momentum.py @@ -53,7 +53,10 @@ class MomentumOptimizer(optimizer.Optimizer): gradients. Defaults to "Momentum". use_nesterov: If `True` use Nesterov Momentum. See [Sutskever et al., 2013]( - http://jmlr.org/proceedings/papers/v28/sutskever13.pdf) + http://jmlr.org/proceedings/papers/v28/sutskever13.pdf). + This implementation always computes gradients at the value of the + variable(s) passed to the optimizer. Using Nesterov Momentum makes the + variable(s) track the values called `theta_t + mu*v_t` in the paper. """ super(MomentumOptimizer, self).__init__(use_locking, name) diff --git a/tensorflow/python/training/rmsprop.py b/tensorflow/python/training/rmsprop.py index d046456c85..ebec725b7b 100644 --- a/tensorflow/python/training/rmsprop.py +++ b/tensorflow/python/training/rmsprop.py @@ -26,6 +26,8 @@ mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2 mom = momentum * mom{t-1} + learning_rate * g_t / sqrt(mean_square + epsilon) delta = - mom +This implementation of RMSProp uses plain momentum, not Nesterov momentum. + The centered version additionally maintains a moving (discounted) average of the gradients, and uses that average to estimate the variance: -- GitLab From 5bba158bbeea684c3e87de28a61004dbef28e00d Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 9 Oct 2017 10:07:05 -0700 Subject: [PATCH 181/909] Print numpy value for variables when in Eager mode PiperOrigin-RevId: 171549468 --- tensorflow/python/framework/ops.py | 24 ++++++++++--------- .../python/kernel_tests/variables_test.py | 2 +- tensorflow/python/ops/variables.py | 10 +++++--- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index e6e6b9c6ca..0257f094d7 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -174,6 +174,17 @@ def uid(): return c_api.TFE_Py_UID() +def numpy_text(tensor, is_repr=False): + """Human readable representation of a tensor's numpy value.""" + if tensor.dtype.is_numpy_compatible: + text = repr(tensor.numpy()) if is_repr else str(tensor.numpy()) + else: + text = "" + if "\n" in text: + text = "\n" + text + return text + + # NOTE(ebrevdo): Do not subclass this. If you do, I will break you on purpose. class _TensorLike(object): """Internal cls for grouping Tensor, SparseTensor, ..., for is_instance.""" @@ -590,15 +601,6 @@ class _EagerTensorBase(Tensor): # performance-sensitive in some models. return dtypes._INTERN_TABLE[self._datatype_enum()] # pylint: disable=protected-access - def _numpy_text(self, is_repr=False): - if self.dtype.is_numpy_compatible: - numpy_text = repr(self.numpy()) if is_repr else str(self.numpy()) - else: - numpy_text = "" - if "\n" in numpy_text: - numpy_text = "\n" + numpy_text - return numpy_text - def numpy(self): """Returns a numpy array with the same contents as the Tensor. @@ -640,13 +642,13 @@ class _EagerTensorBase(Tensor): raise NotImplementedError() def __str__(self): - return "tf.Tensor(%s, shape=%s, dtype=%s)" % (self._numpy_text(), + return "tf.Tensor(%s, shape=%s, dtype=%s)" % (numpy_text(self), self.shape, self.dtype.name) def __repr__(self): return "" % ( - self._id, self.shape, self.dtype.name, self._numpy_text(is_repr=True)) + self._id, self.shape, self.dtype.name, numpy_text(self, is_repr=True)) @staticmethod def _override_operator(name, func): diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 7718710c69..f60ebf58f6 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -504,7 +504,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose(np.ones((5, 5), np.float32), var.eval()) def testRepr(self): - var = variables.Variable(np.zeros((5, 5), np.float32), name='noop') + var = variables.Variable(np.zeros((5, 5), np.float32), name="noop") self.assertEqual( "", repr(var)) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index a27f26e303..90b4f25d81 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -213,9 +213,13 @@ class Variable(object): constraint=constraint) def __repr__(self): - return "" % (self.name, - self.get_shape(), - self.dtype.name) + if context.in_eager_mode(): + return "" % ( + self.name, self.get_shape(), self.dtype.name, + ops.numpy_text(self.read_value(), is_repr=True)) + else: + return "" % ( + self.name, self.get_shape(), self.dtype.name) def _init_from_args(self, initial_value=None, -- GitLab From ff8019199722f516968ba2867c7f090dc73a734f Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 9 Oct 2017 10:27:18 -0700 Subject: [PATCH 182/909] Estimator.predict should not generate warning if user uses TF dataset. PiperOrigin-RevId: 171552443 --- tensorflow/python/estimator/BUILD | 1 + tensorflow/python/estimator/estimator.py | 27 +++++++++-- tensorflow/python/estimator/estimator_test.py | 46 ++++++++++++++++++- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 22de474013..2040d45cb6 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -406,6 +406,7 @@ py_test( "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variables", + "//tensorflow/python/data", "//tensorflow/python/ops/losses", "//tensorflow/python/saved_model:loader", "//tensorflow/python/saved_model:tag_constants", diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 1197366256..4dfc53aadf 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -51,6 +51,7 @@ from tensorflow.python.training import saver from tensorflow.python.training import training from tensorflow.python.training import training_util from tensorflow.python.util import compat +from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -564,13 +565,16 @@ class Estimator(object): return export_dir def _get_features_from_input_fn(self, input_fn, mode): + """Extracts the `features` from return values of `input_fn`.""" result = self._call_input_fn(input_fn, mode) - if not ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS): - logging.warning('Input graph does not contain a QueueRunner. ' - 'That means predict yields forever. ' - 'This is probably a mistake.') if isinstance(result, (list, tuple)): - return result[0] + # Unconditionally drop the label (the second element of result). + result = result[0] + + if not _has_dataset_or_queue_runner(result): + logging.warning('Input graph does not use tf.data.Dataset or contain a ' + 'QueueRunner. That means predict yields forever. ' + 'This is probably a mistake.') return result def _get_features_and_labels_from_input_fn(self, input_fn, mode): @@ -1005,3 +1009,16 @@ def _write_dict_to_summary(output_dir, key) summary_writer.add_summary(summary_proto, current_global_step) summary_writer.flush() + + +def _has_dataset_or_queue_runner(maybe_tensor): + """Returns True if TF dataset or QueueRunner has been used.""" + # Check TF dataset first. Here, we use a simple algorithm to check the top + # level Tensors only, which should be sufficient for most users. + tensors = [x for x in nest.flatten(maybe_tensor) if isinstance(x, ops.Tensor)] + if any([t.op.type == 'IteratorGetNext' for t in tensors]): + return True + + # Now, check queue. + return ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS) + diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index cdffe3378f..0040ec3650 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -29,6 +29,7 @@ import six from google.protobuf import text_format from tensorflow.python.client import session +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import run_config @@ -1212,7 +1213,50 @@ class EstimatorPredictTest(test.TestCase): next(est.predict(dummy_input_fn)) self.assertRegexpMatches( str(mock_log.call_args), - 'Input graph does not contain a QueueRunner.') + 'Input graph does not.*contain a QueueRunner.') + + def test_skip_warn_if_dataset_returns_features(self): + + def _model_fn(features, labels, mode): + _, _ = features, labels + return model_fn_lib.EstimatorSpec( + mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + predictions=constant_op.constant([[10.]])) + + def _input_fn(): + it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator() + return it.get_next() + + est = estimator.Estimator(model_fn=_model_fn) + est.train(dummy_input_fn, steps=1) + with test.mock.patch.object(logging, 'warning') as mock_log: + next(est.predict(_input_fn)) + # The warning should not have keyword QueueRunner. + self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$') + + def test_skip_warn_if_dataset_returns_features_dict(self): + + def _model_fn(features, labels, mode): + _, _ = features, labels + return model_fn_lib.EstimatorSpec( + mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + predictions=constant_op.constant([[10.]])) + + def _input_fn(): + it = dataset_ops.Dataset.from_tensors([1]).make_one_shot_iterator() + features = {'age': it.get_next()} + return features + + est = estimator.Estimator(model_fn=_model_fn) + est.train(dummy_input_fn, steps=1) + with test.mock.patch.object(logging, 'warning') as mock_log: + next(est.predict(_input_fn)) + # The warning should not have keyword QueueRunner. + self.assertRegexpMatches(str(mock_log.call_args), '^((?!QueueRunner).)*$') def test_input_fn_can_return_just_features(self): -- GitLab From 9ff05e9e7f471a8487cdd8a7bb6fdd554055e2dd Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 9 Oct 2017 10:48:57 -0700 Subject: [PATCH 183/909] Fixing the name of the disabled test. (#13593) --- tensorflow/contrib/cmake/tf_tests.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 4cf22a9c47..0e61cd6539 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -229,7 +229,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/cholesky_op_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/diag_op_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/linalg_ops_test.py" - "${tensorflow_source_dir}/tensorflow/python/ops/init_ops.py" + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/init_ops_test.py" "${tensorflow_source_dir}/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py" # misc "${tensorflow_source_dir}/tensorflow/python/kernel_tests/variable_scope_test.py" -- GitLab From 15dd5fd0b2e0b39d87b1cb873ae84225d86173db Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Mon, 9 Oct 2017 11:00:55 -0700 Subject: [PATCH 184/909] Track persistent memory in constant op. PiperOrigin-RevId: 171557547 --- tensorflow/core/kernels/BUILD | 2 +- tensorflow/core/kernels/constant_op.cc | 12 +++- tensorflow/core/kernels/constant_op_test.cc | 65 +++++++++++++++++++++ 3 files changed, 77 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index a3aa905415..ad6f84304d 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -900,7 +900,7 @@ tf_cc_test( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "constant_op_test", size = "small", srcs = ["constant_op_test.cc"], diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index 618d4f580b..018ace5485 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -54,7 +54,17 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx) DataTypeString(ctx->output_type(0)), ")")); } -void ConstantOp::Compute(OpKernelContext* ctx) { ctx->set_output(0, tensor_); } +void ConstantOp::Compute(OpKernelContext* ctx) { + ctx->set_output(0, tensor_); + if (TF_PREDICT_FALSE(ctx->track_allocations())) { + AllocatorAttributes attr; + if (ctx->allocate_on_host(attr)) { + ctx->record_host_persistent_memory_allocation(tensor_.AllocatedBytes()); + } else { + ctx->record_device_persistent_memory_allocation(tensor_.AllocatedBytes()); + } + } +} ConstantOp::~ConstantOp() {} diff --git a/tensorflow/core/kernels/constant_op_test.cc b/tensorflow/core/kernels/constant_op_test.cc index 2d44140b72..62cc67c736 100644 --- a/tensorflow/core/kernels/constant_op_test.cc +++ b/tensorflow/core/kernels/constant_op_test.cc @@ -14,17 +14,82 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { +class ConstantOpTest : public OpsTestBase { + protected: + void PersistentMemoryTrackingTest(bool on_gpu); +}; + +void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) { + DataType data_type = DT_INT32; + std::initializer_list dims = {2, 3, 4, 5}; + Tensor tensor(data_type, TensorShape(dims)); + for (int i = 0; i < 2 * 3 * 4 * 5; ++i) { + tensor.flat()(i) = i; + } + + NodeDef const_node; + TF_ASSERT_OK(NodeDefBuilder("some_node", "Const") + .Attr("dtype", data_type) + .Attr("value", tensor) + .Finalize(&const_node)); + + string device_string = "CPU"; + DeviceType device_type = DEVICE_CPU; + if (on_gpu) { + device_string = "GPU"; + DeviceType device_type = DEVICE_GPU; + } + std::unique_ptr device(DeviceFactory::NewDevice( + device_string, {}, "/job:worker/replica:0/task:0")); + + Status status; + std::unique_ptr op(CreateOpKernel(device_type, device.get(), + cpu_allocator(), const_node, + TF_GRAPH_DEF_VERSION, &status)); + + OpKernelContext::Params params; + params.device = device.get(); + params.frame_iter = FrameAndIter(0, 0); + params.op_kernel = op.get(); + params.track_allocations = true; + + OpKernelContext ctx(¶ms); + op->Compute(&ctx); + TF_EXPECT_OK(ctx.status()); + + if (on_gpu) { + EXPECT_EQ(ctx.device_persistent_memory_allocated(), 512); + } else { + EXPECT_EQ(ctx.host_persistent_memory_allocated(), 480); + } + + // Remove memry leak errors. + for (auto allocator_pair : ctx.wrapped_allocators()) { + allocator_pair.second->GetRecordsAndUnRef(); + } +} + +TEST_F(ConstantOpTest, PersistentMemoryTracking) { + PersistentMemoryTrackingTest(false); +#if GOOGLE_CUDA + PersistentMemoryTrackingTest(true); +#endif // GOOGLE_CUDA +} + // Returns graph containing "num" const nodes. If 'sequential' is // true, make sure all constants are executed sequentially in the // graph by adding control dependencies. -- GitLab From e56628b085ffa7922e5238537f6ebd6deee0f0cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 11:30:51 -0700 Subject: [PATCH 185/909] [TF:XLA] Rename ComputationBuilder::LogicalX to X PiperOrigin-RevId: 171562764 --- .../compiler/tf2xla/kernels/binary_ops.cc | 4 ++-- .../compiler/tf2xla/kernels/random_ops.cc | 2 +- .../compiler/tf2xla/kernels/reduction_ops.cc | 4 ++-- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 6 ++--- .../compiler/tf2xla/kernels/softmax_op.cc | 2 +- .../compiler/tf2xla/kernels/unary_ops.cc | 8 +++---- .../xla/client/computation_builder.cc | 6 ++--- .../compiler/xla/client/computation_builder.h | 6 ++--- .../compiler/xla/client/lib/arithmetic.cc | 4 ++-- .../xla/tests/array_elementwise_ops_test.cc | 24 +++++++++---------- .../xla/tests/broadcast_simple_test.cc | 4 ++-- .../xla/tests/scalar_computations_test.cc | 14 +++++------ tensorflow/compiler/xla/tests/while_test.cc | 4 ++-- 13 files changed, 43 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index 58538b4513..a180f1e4d9 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -96,8 +96,8 @@ static xla::ComputationDataHandle FloorModImpl(xla::ComputationBuilder* b, XLA_MAKE_BINARY(FloorMod, FloorModImpl(b, input_type(0), lhs, rhs, broadcast_helper)); -XLA_MAKE_BINARY(LogicalAnd, b->LogicalAnd(lhs, rhs, extend_dimensions)); -XLA_MAKE_BINARY(LogicalOr, b->LogicalOr(lhs, rhs, extend_dimensions)); +XLA_MAKE_BINARY(LogicalAnd, b->And(lhs, rhs, extend_dimensions)); +XLA_MAKE_BINARY(LogicalOr, b->Or(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Mod, b->Rem(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Maximum, b->Max(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Minimum, b->Min(lhs, rhs, extend_dimensions)); diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc index 66b99665cb..2421825ead 100644 --- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc @@ -140,7 +140,7 @@ class TruncatedNormalOp : public XlaOpKernel { xla::ComputationBuilder* b) { xla::ComputationDataHandle too_large = b->Gt(candidate, two_sd(false, b)); xla::ComputationDataHandle too_small = b->Lt(candidate, two_sd(true, b)); - return b->LogicalOr(too_large, too_small); + return b->Or(too_large, too_small); }; // The algorithm we're using is roughly: diff --git a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc index dae2eb9d2a..647b627408 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduction_ops.cc @@ -129,7 +129,7 @@ class AllOp : public XlaReductionOp { void BuildReducer(xla::ComputationBuilder* builder, const xla::ComputationDataHandle& scalar_lhs, const xla::ComputationDataHandle& scalar_rhs) override { - builder->LogicalAnd(scalar_lhs, scalar_rhs); + builder->And(scalar_lhs, scalar_rhs); } }; @@ -147,7 +147,7 @@ class AnyOp : public XlaReductionOp { void BuildReducer(xla::ComputationBuilder* builder, const xla::ComputationDataHandle& scalar_lhs, const xla::ComputationDataHandle& scalar_rhs) override { - builder->LogicalOr(scalar_lhs, scalar_rhs); + builder->Or(scalar_lhs, scalar_rhs); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index a137d28118..12a3552999 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -77,9 +77,9 @@ class Relu6GradOp : public XlaOpKernel { b->Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); const auto six = b->Broadcast( XlaHelpers::IntegerLiteral(b, input_type(0), 6), shape.dim_sizes()); - auto out = b->Select( - b->LogicalAnd(b->Lt(ctx->Input(1), six), b->Gt(ctx->Input(1), zero)), - ctx->Input(0), zero); + auto out = + b->Select(b->And(b->Lt(ctx->Input(1), six), b->Gt(ctx->Input(1), zero)), + ctx->Input(0), zero); ctx->SetOutput(0, out); } }; diff --git a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc index a0d8ab4d73..750a4c2dec 100644 --- a/tensorflow/compiler/tf2xla/kernels/softmax_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/softmax_op.cc @@ -202,7 +202,7 @@ class SparseSoftmaxXentWithLogitsOp : public XlaOpKernel { // NaN otherwise; then add that vector to the labels to force out-of-range // values to NaNs. xla::ComputationDataHandle nan_or_zero = builder->Select( - builder->LogicalAnd( + builder->And( builder->Le(XlaHelpers::Zero(builder, indices_type), indices), builder->Lt(indices, XlaHelpers::IntegerLiteral( builder, indices_type, depth))), diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 3e4a0f5950..8f04fc94be 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -87,7 +87,7 @@ XLAJIT_MAKE_UNARY(Log, b->Log(x)); // TODO(b/34703906): use a more accurate implementation of log1p. XLAJIT_MAKE_UNARY(Log1p, b->Log(b->Add(XlaHelpers::One(b, input_type(0)), x))); -XLAJIT_MAKE_UNARY(LogicalNot, b->LogicalNot(x)); +XLAJIT_MAKE_UNARY(LogicalNot, b->Not(x)); XLAJIT_MAKE_UNARY(Neg, b->Neg(x)); // Implements Banker's rounding: numbers that are equidistant between two @@ -104,9 +104,9 @@ static xla::ComputationDataHandle Round(xla::ComputationBuilder* b, auto nearest_even_int = b->Sub(round_val, b->Mul(two, b->Floor(b->Mul(half, x)))); auto is_odd = b->Eq(nearest_even_int, one); - return b->Select(b->LogicalOr(b->Gt(fraction, half), - b->LogicalAnd(b->Eq(fraction, half), is_odd)), - b->Add(round_val, one), round_val); + return b->Select( + b->Or(b->Gt(fraction, half), b->And(b->Eq(fraction, half), is_odd)), + b->Add(round_val, one), round_val); } XLAJIT_MAKE_UNARY(Rint, Round(b, input_type(0), x)); diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 925dcd36c0..4757e8b0d2 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -955,19 +955,19 @@ ComputationDataHandle ComputationBuilder::Min( return BinaryOp(BINOP_MIN, lhs, rhs, broadcast_dimensions); } -ComputationDataHandle ComputationBuilder::LogicalAnd( +ComputationDataHandle ComputationBuilder::And( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { return BinaryOp(BINOP_LOGICAL_AND, lhs, rhs, broadcast_dimensions); } -ComputationDataHandle ComputationBuilder::LogicalOr( +ComputationDataHandle ComputationBuilder::Or( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { return BinaryOp(BINOP_LOGICAL_OR, lhs, rhs, broadcast_dimensions); } -ComputationDataHandle ComputationBuilder::LogicalNot( +ComputationDataHandle ComputationBuilder::Not( const ComputationDataHandle& operand) { return UnaryOp(UNOP_LOGICAL_NOT, operand); } diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 7014685ea5..23769f0afc 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -461,15 +461,15 @@ class ComputationBuilder { tensorflow::gtl::ArraySlice broadcast_dimensions = {}); // Element-wise logical operators - ComputationDataHandle LogicalAnd( + ComputationDataHandle And( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions = {}); - ComputationDataHandle LogicalOr( + ComputationDataHandle Or( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions = {}); - ComputationDataHandle LogicalNot(const ComputationDataHandle& lhs); + ComputationDataHandle Not(const ComputationDataHandle& operand); // Reduces an array among the provided dimensions, given "computation" as a // reduction operator. diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.cc b/tensorflow/compiler/xla/client/lib/arithmetic.cc index 969b0eee1d..99e9f2dbb2 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.cc +++ b/tensorflow/compiler/xla/client/lib/arithmetic.cc @@ -93,14 +93,14 @@ Computation CreateScalarLogicalAndComputation(ComputationBuilder* builder) { return CreateScalarComputation( "logical_and", PRED, builder, [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->LogicalAnd(lhs, rhs); }); + const ComputationDataHandle& rhs) { return b->And(lhs, rhs); }); } Computation CreateScalarLogicalOrComputation(ComputationBuilder* builder) { return CreateScalarComputation( "logical_or", PRED, builder, [](ComputationBuilder* b, const ComputationDataHandle& lhs, - const ComputationDataHandle& rhs) { return b->LogicalOr(lhs, rhs); }); + const ComputationDataHandle& rhs) { return b->Or(lhs, rhs); }); } StatusOr Any(const ComputationDataHandle& predicates, diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 24bccf6863..08b39b6379 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -496,54 +496,54 @@ XLA_TEST_F(ArrayElementwiseOpTest, MulTwoConstantU32s) { ComputeAndCompareR1(&builder, expected, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalAnd) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanAnd) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({false, false, true, true}); auto b = builder.ConstantR1({false, true, false, true}); - auto out = builder.LogicalAnd(a, b); + auto out = builder.And(a, b); ComputeAndCompareR1(&builder, {false, false, false, true}, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalAndZeroElement) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanAndZeroElement) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.LogicalAnd(a, b); + auto out = builder.And(a, b); ComputeAndCompareR1(&builder, {}, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalOr) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanOr) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({false, false, true, true}); auto b = builder.ConstantR1({false, true, false, true}); - auto out = builder.LogicalOr(a, b); + auto out = builder.Or(a, b); ComputeAndCompareR1(&builder, {false, true, true, true}, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalOrZeroElement) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanOrZeroElement) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({}); auto b = builder.ConstantR1({}); - auto out = builder.LogicalOr(a, b); + auto out = builder.Or(a, b); ComputeAndCompareR1(&builder, {}, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalNot) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanNot) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({false, true, true, false}); - auto out = builder.LogicalNot(a); + auto out = builder.Not(a); ComputeAndCompareR1(&builder, {true, false, false, true}, {}); } -XLA_TEST_F(ArrayElementwiseOpTest, LogicalNotZeroElement) { +XLA_TEST_F(ArrayElementwiseOpTest, BooleanNotZeroElement) { ComputationBuilder builder(client_, TestName()); auto a = builder.ConstantR1({}); - auto out = builder.LogicalNot(a); + auto out = builder.Not(a); ComputeAndCompareR1(&builder, {}, {}); } diff --git a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc index 505fa059f2..03f5e08315 100644 --- a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc +++ b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc @@ -159,7 +159,7 @@ XLA_TEST_F(BroadcastSimpleTest, 1DTo2D) { } // Tests implicit broadcasting of PREDs. -XLA_TEST_F(BroadcastSimpleTest, LogicalAnd2DTo3D_Pred) { +XLA_TEST_F(BroadcastSimpleTest, BooleanAnd2DTo3D_Pred) { ComputationBuilder b(client_, TestName()); Array2D x_vals(2, 1); @@ -174,7 +174,7 @@ XLA_TEST_F(BroadcastSimpleTest, LogicalAnd2DTo3D_Pred) { ComputationDataHandle x, y; auto x_data = CreateR2Parameter(x_vals, 0, "x", &b, &x); auto y_data = CreateR3Parameter(y_vals, 1, "y", &b, &y); - b.LogicalAnd(x, y, /*broadcast_dimensions=*/{1, 2}); + b.And(x, y, /*broadcast_dimensions=*/{1, 2}); Array3D expected(2, 2, 1); expected(0, 0, 0) = false; diff --git a/tensorflow/compiler/xla/tests/scalar_computations_test.cc b/tensorflow/compiler/xla/tests/scalar_computations_test.cc index 77d1c019f3..da84d185ca 100644 --- a/tensorflow/compiler/xla/tests/scalar_computations_test.cc +++ b/tensorflow/compiler/xla/tests/scalar_computations_test.cc @@ -459,34 +459,32 @@ XLA_TEST_F(ScalarComputationsTest, RemTwoScalarsU32) { ComputeAndCompareR0(&builder, 2, {}); } -XLA_TEST_F(ScalarComputationsTest, LogicalAnd) { +XLA_TEST_F(ScalarComputationsTest, BooleanAnd) { for (bool x : {false, true}) { for (bool y : {false, true}) { ComputationBuilder builder(client_, TestName()); - builder.LogicalAnd(builder.ConstantR0(x), - builder.ConstantR0(y)); + builder.And(builder.ConstantR0(x), builder.ConstantR0(y)); ComputeAndCompareR0(&builder, x && y, {}); } } } -XLA_TEST_F(ScalarComputationsTest, LogicalOr) { +XLA_TEST_F(ScalarComputationsTest, BooleanOr) { for (bool x : {false, true}) { for (bool y : {false, true}) { ComputationBuilder builder(client_, TestName()); - builder.LogicalOr(builder.ConstantR0(x), - builder.ConstantR0(y)); + builder.Or(builder.ConstantR0(x), builder.ConstantR0(y)); ComputeAndCompareR0(&builder, x || y, {}); } } } -XLA_TEST_F(ScalarComputationsTest, LogicalNot) { +XLA_TEST_F(ScalarComputationsTest, BooleanNot) { for (bool x : {false, true}) { ComputationBuilder builder(client_, TestName()); - builder.LogicalNot(builder.ConstantR0(x)); + builder.Not(builder.ConstantR0(x)); ComputeAndCompareR0(&builder, !x, {}); } diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index bb2d90fa94..71a1b0abee 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -169,7 +169,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) { { ComputationBuilder builder(client_, "body"); auto prev = builder.Parameter(0, result_shape, "prev"); - auto result = builder.LogicalOr(prev, builder.ConstantR0(true)); + auto result = builder.Or(prev, builder.ConstantR0(true)); body = builder.Build().ConsumeValueOrDie(); } @@ -437,7 +437,7 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) { auto prev = builder.Parameter(0, result_shape, "prev"); auto iteration = builder.GetTupleElement(prev, 0); auto pred = builder.GetTupleElement(prev, 1); - auto new_pred = builder.LogicalOr(pred, builder.ConstantR0(true)); + auto new_pred = builder.Or(pred, builder.ConstantR0(true)); auto result = builder.Tuple( {builder.Add(iteration, builder.ConstantR0(1)), new_pred}); body = builder.Build().ConsumeValueOrDie(); -- GitLab From 4a97a8210ce31fe9a3081a3afacdf12f2feeefad Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 9 Oct 2017 12:02:17 -0700 Subject: [PATCH 186/909] Validate input shapes for the graph_callable decorator PiperOrigin-RevId: 171567580 --- tensorflow/python/eager/graph_callable.py | 30 +++++++++++++++---- .../python/eager/graph_callable_test.py | 14 +++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index e3aacbd140..a1bdba6e4e 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import contextlib from tensorflow.python.eager import context @@ -241,15 +240,27 @@ class _InitializingFunctionObject(object): from the graph, which might not be possible in general. """ - def __init__(self, call_fn, init_fn): + def __init__(self, call_fn, init_fn, shape_and_dtypes): self._init_fn = init_fn self._call_fn = call_fn + self.shape_and_dtypes = shape_and_dtypes + self.flattened_shapes = [tensor_shape.as_shape(sd.shape) for sd in + nest.flatten(self.shape_and_dtypes)] @property def variables(self): return self._call_fn.variables def __call__(self, *args): + nest.assert_same_structure(self.shape_and_dtypes, args, check_types=False) + if not all([ + shape.is_compatible_with(arg.shape) + for shape, arg in zip(self.flattened_shapes, nest.flatten(args)) + ]): + raise ValueError( + "Declared shapes do not match argument shapes: Expected %s, found %s." + % (self.flattened_shapes, [arg.shape for arg in nest.flatten(args)])) + initialized = [resource_variable_ops.var_is_initialized_op( v.handle).numpy() for v in self._call_fn.variables] if all(x for x in initialized): @@ -398,12 +409,19 @@ def _graph_callable_internal(func, shape_and_dtypes): function._map_sequence_obj_to_idx(capture_func_def_outputs), # pylint: disable=protected-access output_shapes) - return _InitializingFunctionObject(captured_function, initializer_function) + return _InitializingFunctionObject(captured_function, initializer_function, + shape_and_dtypes) + + +class ShapeAndDtype(object): + """Data type that packages together shape and type information. + Used for arguments to graph callables. See graph_callable() for an example. + """ -# Data type that packages together shape and type information for arguments to -# graph callables. See graph_callable() for an example. -ShapeAndDtype = collections.namedtuple("ShapeAndDtype", ["shape", "dtype"]) + def __init__(self, shape, dtype): + self.shape = shape + self.dtype = dtype def graph_callable(shape_and_dtypes): diff --git a/tensorflow/python/eager/graph_callable_test.py b/tensorflow/python/eager/graph_callable_test.py index 104e019391..57e1a062e1 100644 --- a/tensorflow/python/eager/graph_callable_test.py +++ b/tensorflow/python/eager/graph_callable_test.py @@ -219,6 +219,20 @@ class GraphCallableTest(test.TestCase): my_function() + def testIncorrectlyShapedInputs(self): + @graph_callable.graph_callable( + [graph_callable.ShapeAndDtype(shape=(3), dtype=dtypes.float32)]) + def my_function(x): + v = variable_scope.get_variable( + "v", initializer=init_ops.zeros_initializer(), shape=()) + return v + x + + with self.assertRaises(ValueError): + my_function([1, 2]) + + self.assertTrue(([1, 2, 3] == my_function( + constant_op.constant([1, 2, 3], dtype=dtypes.float32)).numpy()).all()) + if __name__ == "__main__": test.main() -- GitLab From 8ed8e220017c13049490d2c4188e1eaf3ab068b0 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 9 Oct 2017 12:08:22 -0700 Subject: [PATCH 187/909] Make ops_test.py work with the C API enabled. This mostly involves adding custom ops to the test_ops library to replace the ad-hoc ops previously used in the tests (it's not possible to create new ops on the fly using the C API). In addition, this change modifies importer_test.py to use the new custom ops as well. PiperOrigin-RevId: 171568617 --- tensorflow/python/framework/importer_test.py | 213 ++----- tensorflow/python/framework/ops.py | 21 +- tensorflow/python/framework/ops_test.py | 625 ++++++++++--------- tensorflow/python/framework/test_ops.cc | 161 +++++ 4 files changed, 557 insertions(+), 463 deletions(-) diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index 8ce8e76629..e447f9a3e8 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -29,9 +29,7 @@ from tensorflow.python.framework import device from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import importer -from tensorflow.python.framework import op_def_registry from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_ops # pylint: disable=unused-import from tensorflow.python.framework import versions from tensorflow.python.ops import array_ops @@ -44,117 +42,6 @@ import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test -def _UnknownShape(op): - return [tensor_shape.unknown_shape() for _ in op.outputs] - - -# NOTE(cwhipkey): Dummy shape registration for ops used in the tests, since they -# don't have C++ op registrations on which to attach C++ shape fns. -ops.RegisterShape("If")(_UnknownShape) -ops.RegisterShape("Iff")(_UnknownShape) -ops.RegisterShape("Ii")(_UnknownShape) -ops.RegisterShape("Iif")(_UnknownShape) -ops.RegisterShape("Iii")(_UnknownShape) -ops.RegisterShape("In")(_UnknownShape) -ops.RegisterShape("Iri")(_UnknownShape) -ops.RegisterShape("None")(_UnknownShape) -ops.RegisterShape("Of")(_UnknownShape) -ops.RegisterShape("Oi")(_UnknownShape) -ops.RegisterShape("Oif")(_UnknownShape) -ops.RegisterShape("Oii")(_UnknownShape) -ops.RegisterShape("OpWithDefaultAttr")(_UnknownShape) -ops.RegisterShape("OpWithFutureDefaultAttr")(_UnknownShape) -ops.RegisterShape("Or")(_UnknownShape) -ops.RegisterShape("Otl")(_UnknownShape) -ops.RegisterShape("Unary")(_UnknownShape) - -_op_list = op_def_pb2.OpList() -text_format.Merge(""" - op { - name: 'None' - } - op { - name: 'Oi' - output_arg { name: 'a' type: DT_INT32 } - } - op { - name: 'Or' - output_arg { name: 'a' type: DT_INT32 is_ref: true } - } - op { - name: 'Of' - output_arg { name: 'a' type: DT_FLOAT } - } - op { - name: 'Ii' - input_arg { name: 'a' type: DT_INT32 } - } - op { - name: 'If' - input_arg { name: 'a' type: DT_FLOAT } - } - op { - name: 'Oii' - output_arg { name: 'a' type: DT_INT32 } - output_arg { name: 'b' type: DT_INT32 } - } - op { - name: 'Oif' - output_arg { name: 'a' type: DT_INT32 } - output_arg { name: 'b' type: DT_FLOAT } - } - op { - name: 'Iii' - input_arg { name: 'a' type: DT_INT32 } - input_arg { name: 'b' type: DT_INT32 } - } - op { - name: 'Iff' - input_arg { name: 'a' type: DT_FLOAT } - input_arg { name: 'b' type: DT_FLOAT } - } - op { - name: 'Iif' - input_arg { name: 'a' type: DT_INT32 } - input_arg { name: 'b' type: DT_FLOAT } - } - op { - name: 'Iri' - input_arg { name: 'a' type: DT_INT32 is_ref: true } - input_arg { name: 'b' type: DT_INT32 } - } - op { - name: 'In' - input_arg { name: 'a' number_attr: 'N' type_attr: 'T' } - attr { name: 'N' type: 'int' minimum: 1 } - attr { name: 'T' type: 'type' } - } - op { - name: 'Otl' - output_arg { name: 'a' type_list_attr: 't' } - attr { name: 'T' type: 'list(type)' minimum: 1 } - } - op { - name: 'Unary' - input_arg { name: 'a' type_attr: 'T' } - output_arg { name: 'b' type_attr: 'T' } - attr { name: 'T' type: 'type' } - } - op { - name: 'OpWithDefaultAttr' - output_arg { name: 'a' type: DT_INT32 } - attr { name: 'default_float' type: 'float' default_value { f: 123.0 } } - } - op { - name: 'OpWithFutureDefaultAttr' - } -""", _op_list) -op_def_registry.register_op_list(_op_list) -# NOTE(mrry): Dummy shape registrations for ops used in the tests. -for op_def in _op_list.op: - ops.RegisterShape(op_def.name)(None) - - class ImportGraphDefTest(test.TestCase): def _MakeGraphDef(self, @@ -172,15 +59,15 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): a, b, c, d = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oif' } - node { name: 'B' op: 'Otl' - attr { key: 't' + node { name: 'A' op: 'IntOutputFloatOutput' } + node { name: 'B' op: 'ListOutput' + attr { key: 'T' value { list { type: DT_INT32 type: DT_FLOAT } } } } - node { name: 'C' op: 'In' + node { name: 'C' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'In' + node { name: 'D' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_FLOAT } } input: 'A:1' input: 'B:1' } @@ -203,10 +90,10 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(d.inputs[1], b.outputs[1]) # Check the types of the returned ops and tensors. - self.assertEqual(a.type, "Oif") - self.assertEqual(b.type, "Otl") - self.assertEqual(c.type, "In") - self.assertEqual(d.type, "In") + self.assertEqual(a.type, "IntOutputFloatOutput") + self.assertEqual(b.type, "ListOutput") + self.assertEqual(c.type, "ListInput") + self.assertEqual(d.type, "ListInput") self.assertEqual(a.outputs[0].dtype, dtypes.int32) self.assertEqual(a.outputs[1].dtype, dtypes.float32) self.assertEqual(b.outputs[0].dtype, dtypes.int32) @@ -228,13 +115,13 @@ class ImportGraphDefTest(test.TestCase): a, b, c, d = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oii' } - node { name: 'B' op: 'Oii' } - node { name: 'C' op: 'In' + node { name: 'A' op: 'TwoIntOutputs' } + node { name: 'B' op: 'TwoIntOutputs' } + node { name: 'C' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'In' + node { name: 'D' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:1' input: 'B:1' } @@ -255,13 +142,13 @@ class ImportGraphDefTest(test.TestCase): a, b, c, d = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oii' } - node { name: 'B' op: 'Oii' } - node { name: 'C' op: 'In' + node { name: 'A' op: 'TwoIntOutputs' } + node { name: 'B' op: 'TwoIntOutputs' } + node { name: 'C' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'In' + node { name: 'D' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:1' input: 'B:1' } @@ -282,13 +169,13 @@ class ImportGraphDefTest(test.TestCase): a, b, c, d = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oii' } - node { name: 'B' op: 'Oii' } - node { name: 'C' op: 'In' + node { name: 'A' op: 'TwoIntOutputs' } + node { name: 'B' op: 'TwoIntOutputs' } + node { name: 'C' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'In' + node { name: 'D' op: 'ListInput' attr { key: 'N' value { i: 2 } } attr { key: 'T' value { type: DT_INT32 } } input: 'A:1' input: 'B:1' } @@ -306,8 +193,8 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): a, b = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oii' } - node { name: 'B' op: 'Ii' input: 'A' } + node { name: 'A' op: 'TwoIntOutputs' } + node { name: 'B' op: 'IntInput' input: 'A' } """), return_elements=["A", "B"]) @@ -318,8 +205,8 @@ class ImportGraphDefTest(test.TestCase): feed_a_0 = constant_op.constant(0, dtype=dtypes.int32) b, = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oii' } - node { name: 'B' op: 'Ii' input: 'A:0' } + node { name: 'A' op: 'TwoIntOutputs' } + node { name: 'B' op: 'IntInput' input: 'A:0' } """), input_map={"A": feed_a_0}, return_elements=["B"]) @@ -341,10 +228,10 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): a, b, c, d = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Or' } - node { name: 'B' op: 'Oi' } - node { name: 'C' op: 'Iii' input: 'A:0' input: 'B:0' } - node { name: 'D' op: 'Iri' input: 'A:0' input: 'B:0' } + node { name: 'A' op: 'RefOutput' } + node { name: 'B' op: 'IntOutput' } + node { name: 'C' op: 'TwoIntInputs' input: 'A:0' input: 'B:0' } + node { name: 'D' op: 'RefInputIntInput' input: 'A:0' input: 'B:0' } """), return_elements=["A", "B", "C", "D"]) @@ -378,8 +265,8 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } - node { name: 'B' op: 'If' input: 'A:0' } + node { name: 'A' op: 'IntOutput' } + node { name: 'B' op: 'FloatInput' input: 'A:0' } """)) self.assertTrue( "Cannot convert a tensor of type int32 to an input of type float" in @@ -405,7 +292,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: _ = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Of' } + node { name: 'A' op: 'FloatOutput' } node { name: 'B' op: 'L2Loss' input: 'A:0' attr { key: 'T' value { type: DT_FLOAT } } @@ -422,7 +309,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } node { name: 'B' op: 'None' input: 'A:0' } """)) self.assertTrue("More inputs specified ('A:0') than the op expects" in @@ -433,8 +320,8 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } - node { name: 'B' op: 'Iif' input: 'A:0' } + node { name: 'A' op: 'IntOutput' } + node { name: 'B' op: 'IntInputFloatInput' input: 'A:0' } """)) self.assertTrue("Input types mismatch (expected 'int32, float32' but " "got 'int32')" in str(e.exception)) @@ -444,7 +331,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'B' op: 'If' input: 'A:0' } + node { name: 'B' op: 'FloatInput' input: 'A:0' } """)) self.assertTrue("Input tensor 'A:0' not found" in str(e.exception)) @@ -453,7 +340,7 @@ class ImportGraphDefTest(test.TestCase): feed_a_0 = constant_op.constant(5.0) b, = importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'B' op: 'If' input: 'A:0' } + node { name: 'B' op: 'FloatInput' input: 'A:0' } """), input_map={"A:0": feed_a_0}, return_elements=["B"]) @@ -464,8 +351,8 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Of' } - node { name: 'B' op: 'If' input: 'A:1' } + node { name: 'A' op: 'FloatOutput' } + node { name: 'B' op: 'FloatInput' input: 'A:1' } """)) self.assertTrue("Input tensor 'A:1' not found" in str(e.exception)) @@ -514,7 +401,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } """), return_elements=["A:1"]) self.assertTrue( @@ -523,7 +410,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } """), return_elements=["B:0"]) self.assertTrue( @@ -532,7 +419,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } """), return_elements=["A:B:0"]) self.assertTrue( @@ -553,7 +440,7 @@ class ImportGraphDefTest(test.TestCase): # Mapping an unused node output should succeed. importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } """), input_map={"A:0": constant_op.constant(5.0)}) @@ -561,7 +448,7 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } """), input_map={"A:2": constant_op.constant(5.0)}) self.assertTrue("not found in graph_def: [A:2]" in str(e.exception)) @@ -571,8 +458,8 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } - node { name: 'B' op: 'Ii' input: 'A:0' } + node { name: 'A' op: 'IntOutput' } + node { name: 'B' op: 'IntInput' input: 'A:0' } """), input_map={"A:0": constant_op.constant(5.0)}) self.assertTrue( @@ -826,9 +713,9 @@ class ImportGraphDefTest(test.TestCase): with self.assertRaises(ValueError) as e: importer.import_graph_def( self._MakeGraphDef(""" - node { name: 'A' op: 'Oi' } - node { name: 'B' op: 'Oi' } - node { name: 'A' op: 'Oi' } + node { name: 'A' op: 'IntOutput' } + node { name: 'B' op: 'IntOutput' } + node { name: 'A' op: 'IntOutput' } """)) self.assertEqual("Duplicate name 'A' in GraphDef.", str(e.exception)) @@ -962,7 +849,7 @@ class ImportGraphDefTest(test.TestCase): with ops.Graph().as_default(): a, = importer.import_graph_def( self._MakeGraphDef( - "node { name: 'A' op: 'Oii' }", + "node { name: 'A' op: 'TwoIntOutputs' }", producer=producer, min_consumer=min_consumer), return_elements=["A"]) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 0257f094d7..669588ace0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -415,6 +415,7 @@ class Tensor(_TensorLike): ValueError: If `shape` is not compatible with the current shape of this tensor. """ + # TODO(skyewm): call C API self._shape = self._shape.merge_with(shape) @property @@ -1873,6 +1874,7 @@ class Operation(object): """The list of `Tensor` objects representing the data inputs of this op.""" if self._c_op: tf_outputs = c_api.GetOperationInputs(self._c_op) + # TODO(skyewm): return Operation._InputList # pylint: disable=protected-access return [self.graph._get_tensor_by_tf_output(tf_output) for tf_output in tf_outputs] @@ -4340,14 +4342,17 @@ class _DefaultStack(threading.local): self.stack.append(default) yield default finally: - if self._enforce_nesting: - if self.stack[-1] is not default: - raise AssertionError( - "Nesting violated for default stack of %s objects" % - type(default)) - self.stack.pop() - else: - self.stack.remove(default) + # stack may be empty if reset() was called + if self.stack: + if self._enforce_nesting: + if self.stack[-1] is not default: + raise AssertionError( + "Nesting violated for default stack of %s objects" % + type(default)) + self.stack.pop() + else: + self.stack.remove(default) + _default_session_stack = _DefaultStack() # pylint: disable=protected-access diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 5c39dc192e..9ef7f59529 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -51,6 +51,7 @@ from tensorflow.python.util import compat ops._set_call_cpp_shape_fn(common_shapes.call_cpp_shape_fn) +@test_util.with_c_api class ResourceTest(test_util.TensorFlowTestCase): def testBuildGraph(self): @@ -76,11 +77,12 @@ class ResourceTest(test_util.TensorFlowTestCase): resources.shared_resources()).eval()), 0) +@test_util.with_c_api class TensorTest(test_util.TensorFlowTestCase): def testShape(self): op = ops.Operation( - ops._NodeDef("noop", "myop"), ops.Graph(), [], [dtypes.float32]) + ops._NodeDef("FloatOutput", "myop"), ops.Graph(), [], [dtypes.float32]) t = op.outputs[0] self.assertEqual(tensor_shape.unknown_shape(), t.get_shape()) t.set_shape([1, 2, 3]) @@ -88,7 +90,7 @@ class TensorTest(test_util.TensorFlowTestCase): def testIterable(self): op = ops.Operation( - ops._NodeDef("noop", "myop"), ops.Graph(), [], [dtypes.float32]) + ops._NodeDef("FloatOutput", "myop"), ops.Graph(), [], [dtypes.float32]) t = op.outputs[0] self.assertTrue(isinstance(t, ops.Tensor)) with self.assertRaisesRegexp(TypeError, "not iterable"): @@ -96,6 +98,7 @@ class TensorTest(test_util.TensorFlowTestCase): pass +@test_util.with_c_api class IndexedSlicesTest(test_util.TensorFlowTestCase): def testToTensor(self): @@ -124,11 +127,12 @@ class IndexedSlicesTest(test_util.TensorFlowTestCase): self.assertAllEqual(x.indices.eval(), [0, 2]) +@test_util.with_c_api class NodeDefConstructorTest(test_util.TensorFlowTestCase): def testNoArgs(self): - nodedef = ops._NodeDef("noop", "bar") - self.assertProtoEquals("op: 'noop' name: 'bar'", nodedef) + nodedef = ops._NodeDef("None", "bar") + self.assertProtoEquals("op: 'None' name: 'bar'", nodedef) def testArgs(self): nodedef = ops._NodeDef("foo", "bar", device="/device:baz:*") @@ -138,23 +142,6 @@ class NodeDefConstructorTest(test_util.TensorFlowTestCase): self.assertProtoEquals("op:'foo' name:'bar' device:'/job:j'", nodedef) -# NOTE(mrry): Dummy shape registrations for ops used in the tests, since they -# don't have C++ op registrations on which to attach C++ shape fns. -ops.RegisterShape("a")(common_shapes.unknown_shape) -ops.RegisterShape("b")(common_shapes.unknown_shape) -ops.RegisterShape("c")(common_shapes.unknown_shape) -ops.RegisterShape("add")(common_shapes.unknown_shape) -ops.RegisterShape("an_op")(common_shapes.unknown_shape) -ops.RegisterShape("const")(common_shapes.unknown_shape) -ops.RegisterShape("copy")(common_shapes.unknown_shape) -ops.RegisterShape("foo")(common_shapes.unknown_shape) -ops.RegisterShape("identity")(common_shapes.unknown_shape) -ops.RegisterShape("mul")(common_shapes.unknown_shape) -ops.RegisterShape("nonrefop")(common_shapes.unknown_shape) -ops.RegisterShape("noop")(common_shapes.unknown_shape) -ops.RegisterShape("refop")(common_shapes.unknown_shape) - - def _apply_op(g, *args, **kwargs): op = g.create_op(*args, **kwargs) if len(op.outputs) == 1: @@ -163,12 +150,11 @@ def _apply_op(g, *args, **kwargs): return op.outputs +@test_util.with_c_api class OperationTest(test_util.TensorFlowTestCase): def testNoInputs(self): - op = ops.Operation( - ops._NodeDef("noop", "myop"), - ops.Graph(), [], [dtypes.float32, dtypes.string]) + op = test_ops.float_output_string_output(name="myop").a.op self.assertEqual(2, len(op.values())) self.assertEqual(0, len(op.inputs)) self.assertEqual("myop", op.name) @@ -186,13 +172,13 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(0, len(label_str_t._consumers)) self.assertEqual("myop:1", label_str_t._as_node_def_input()) - self.assertProtoEquals("op:'noop' name:'myop'", op.node_def) + self.assertProtoEquals("op:'FloatOutputStringOutput' name:'myop'", + op.node_def) def testNoOutputs(self): - g = ops.Graph() - op1 = ops.Operation(ops._NodeDef("noop", "myop1"), g, [], [dtypes.float32]) + op1 = test_ops.float_output(name="myop1").op float_t, = op1.values() - op2 = ops.Operation(ops._NodeDef("reop", "myop2"), g, [float_t], []) + op2 = test_ops.float_input(float_t, name="myop2") self.assertEqual(0, len(op2.values())) self.assertEqual(1, len(op2.inputs)) self.assertIs(float_t, op2.inputs[0]) @@ -200,24 +186,21 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(1, len(float_t._consumers)) self.assertEqual(op2, float_t._consumers[0]) - self.assertProtoEquals("op:'noop' name:'myop1'", op1.node_def) - self.assertProtoEquals("op:'reop' name:'myop2' input:'myop1'", op2.node_def) + self.assertProtoEquals("op:'FloatOutput' name:'myop1'", op1.node_def) + self.assertProtoEquals("op:'FloatInput' name:'myop2' input:'myop1'", + op2.node_def) def testInputsAndOutputs(self): - g = ops.Graph() - op1 = ops.Operation(ops._NodeDef("noop", "myop1"), g, [], [dtypes.float32]) + op1 = test_ops.float_output(name="myop1").op self.assertEqual(1, len(op1.values())) float1_t, = op1.values() - op2 = ops.Operation( - ops._NodeDef("reop", "myop2"), g, [], [dtypes.float32, dtypes.string]) + op2 = test_ops.float_output_string_output(name="myop2").a.op self.assertEqual(2, len(op2.values())) float2_t, label2_str_t = op2.values() # Note that we consume label2_str_t twice here. - op3 = ops.Operation( - ops._NodeDef("add", "myop3"), g, [float1_t, label2_str_t, label2_str_t], - [dtypes.float32, dtypes.int32]) + op3 = test_ops.foo2(float1_t, label2_str_t, label2_str_t, name="myop3").d.op self.assertEqual(2, len(op3.values())) self.assertEqual(1, len(float1_t._consumers)) @@ -230,40 +213,42 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual(op3, label2_str_t._consumers[1]) self.assertProtoEquals(""" - op:'add' name:'myop3' + op:'Foo2' name:'myop3' input:'myop1' input:'myop2:1' input:'myop2:1' """, op3.node_def) def testDeviceObject(self): - op = ops.Operation(ops._NodeDef("noop", "myop"), ops.Graph(), [], []) + op = ops.Operation(ops._NodeDef("None", "myop"), ops.Graph(), [], []) op._set_device("/job:goo/device:GPU:0") self.assertProtoEquals( - "op:'noop' name:'myop' device:'/job:goo/device:GPU:0' ", op.node_def) - op = ops.Operation(ops._NodeDef("noop", "op2"), ops.Graph(), [], []) + "op:'None' name:'myop' device:'/job:goo/device:GPU:0' ", op.node_def) + op = ops.Operation(ops._NodeDef("None", "op2"), ops.Graph(), [], []) op._set_device( pydev.DeviceSpec( job="muu", device_type="CPU", device_index=0)) self.assertProtoEquals( - "op:'noop' name:'op2' device:'/job:muu/device:CPU:0'", op.node_def) + "op:'None' name:'op2' device:'/job:muu/device:CPU:0'", op.node_def) def testReferenceInput(self): g = ops.Graph() op1 = ops.Operation( - ops._NodeDef("noop", "op1"), g, [], + ops._NodeDef("RefOutputFloatOutput", "op1"), g, [], [dtypes.float32_ref, dtypes.float32]) - self.assertProtoEquals("op:'noop' name:'op1'", op1.node_def) + self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def) ref_t, nonref_t = op1.values() # NOTE(mrry): Must specify input_types to preserve ref-typed input. op2 = ops.Operation( - ops._NodeDef("refop", "op2"), + ops._NodeDef("RefInputFloatInput", "op2"), g, [ref_t, nonref_t], [], input_types=[dtypes.float32_ref, dtypes.float32]) - self.assertProtoEquals("op:'refop' name:'op2' input:'op1' input:'op1:1'", - op2.node_def) + self.assertProtoEquals( + "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'", + op2.node_def) op3 = ops.Operation( - ops._NodeDef("nonrefop", "op3"), g, [ref_t, nonref_t], []) - self.assertProtoEquals("op:'nonrefop' name:'op3' input:'op1' input:'op1:1'", - op3.node_def) + ops._NodeDef("TwoFloatInputs", "op3"), g, [ref_t, nonref_t], []) + self.assertProtoEquals( + "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'", + op3.node_def) def testInvalidNames(self): g = ops.Graph() @@ -279,10 +264,8 @@ class OperationTest(test_util.TensorFlowTestCase): ops.Operation(ops._NodeDef("op", "invalid:0"), g) def testNoShapeFunction(self): - g = ops.Graph() - ops.Operation(ops._NodeDef("op", "an_op"), g, output_types=[dtypes.float32]) - self.assertEqual(tensor_shape.unknown_shape(), - _apply_op(g, "an_op", [], [dtypes.float32]).get_shape()) + op = test_ops.a() + self.assertEqual(tensor_shape.unknown_shape(), op.get_shape()) def testConvertToTensorNestedArray(self): with self.test_session(): @@ -364,22 +347,25 @@ class OperationTest(test_util.TensorFlowTestCase): ops.convert_to_tensor(op) def testStr(self): - node_def = ops._NodeDef("noop", "op1") + node_def = ops._NodeDef("None", "op1") op = ops.Operation(node_def, ops.Graph(), [], [dtypes.float32]) self.assertEqual(str(node_def), str(op)) def testRepr(self): op = ops.Operation( - ops._NodeDef("noop", "op1"), ops.Graph(), [], [dtypes.float32]) - self.assertEqual("", repr(op)) + ops._NodeDef("None", "op1"), ops.Graph(), [], [dtypes.float32]) + self.assertEqual("", repr(op)) def testGetAttr(self): + # TODO(skyewm): implement get_attr with C API + if ops._USE_C_API: return + list_value = attr_value_pb2.AttrValue.ListValue() list_value.type.append(types_pb2.DT_STRING) list_value.type.append(types_pb2.DT_DOUBLE) op = ops.Operation( ops._NodeDef( - "noop", + "None", "op1", attrs={ "value": attr_value_pb2.AttrValue(i=32), @@ -403,7 +389,6 @@ class OperationTest(test_util.TensorFlowTestCase): self.assertEqual([dtypes.string, dtypes.double], l) # TODO(nolivia): test all error cases - @test_util.enable_c_api def testAddControlInput(self): with ops.Graph().as_default(): x = constant_op.constant(1).op @@ -411,8 +396,9 @@ class OperationTest(test_util.TensorFlowTestCase): y._add_control_input(x) # pylint: disable=protected-access self.assertEqual(y.control_inputs, [x]) - @test_util.enable_c_api def testControlInputCycle(self): + # Non-C API path has a different error message + if not ops._USE_C_API: return graph = ops.Graph() with graph.as_default(): z = constant_op.constant(0) @@ -427,7 +413,6 @@ class OperationTest(test_util.TensorFlowTestCase): "Graph is invalid, contains a cycle with 2 nodes"): sess.run(x) - @test_util.enable_c_api def testUpdateInput(self): g = ops.Graph() with g.as_default(): @@ -436,21 +421,20 @@ class OperationTest(test_util.TensorFlowTestCase): z = x + y z.op._update_input(0, y) # pylint: disable=protected-access - self.assertEquals(z.op.inputs, [y, y]) + self.assertEquals(list(z.op.inputs), [y, y]) with session.Session(graph=g) as sess: self.assertEquals(sess.run(z), 4) z.op._update_input(0, x) # pylint: disable=protected-access - self.assertEquals(z.op.inputs, [x, y]) + self.assertEquals(list(z.op.inputs), [x, y]) with session.Session(graph=g) as sess: self.assertEquals(sess.run(z), 3) z.op._update_input(1, y) # pylint: disable=protected-access - self.assertEquals(z.op.inputs, [x, y]) + self.assertEquals(list(z.op.inputs), [x, y]) with session.Session(graph=g) as sess: self.assertEquals(sess.run(z), 3) - @test_util.enable_c_api def testUpdateInputGraphError(self): g_0 = ops.Graph() g_1 = ops.Graph() @@ -464,7 +448,6 @@ class OperationTest(test_util.TensorFlowTestCase): # TODO(nolivia): check the shape/type in _update_input() instead of depending # on run to do that. - @test_util.enable_c_api def testUpdateInputTypeError(self): g = ops.Graph() with g.as_default(): @@ -480,34 +463,39 @@ class OperationTest(test_util.TensorFlowTestCase): "with expected int32"): sess.run(z) - # C-API throws the error differently. def testUpdateInputOutOfRange(self): + # C-API throws the error differently. + if ops._USE_C_API: return g = ops.Graph() with g.as_default(): x = constant_op.constant(1) - with self.assertRaises(IndexError): + with self.assertRaisesRegexp(IndexError, "list index out of range"): x.op._update_input(1, x) # pylint: disable=protected-access - @test_util.enable_c_api def testUpdateInputOutOfRangeC(self): + # C-API throws the error differently. + if not ops._USE_C_API: return g = ops.Graph() with g.as_default(): x = constant_op.constant(1) with self.assertRaisesRegexp(errors.OutOfRangeError, - "does not have input 1"): + r"Node 'Const' \(type: 'Const', " + r"num of inputs: 0\) does not have input 1"): x.op._update_input(1, x) # pylint: disable=protected-access +@test_util.with_c_api class CreateOpTest(test_util.TensorFlowTestCase): def testNodeDefArgs(self): g = ops.Graph() - op1 = g.create_op("const", [], [dtypes.float32], None, name="myop1") + op1 = g.create_op("FloatOutput", [], [dtypes.float32], None, name="myop1") with g.device("/device:GPU:0"): op2 = g.create_op( - "add", [], [dtypes.float32, dtypes.string], None, name="myop2") + "FloatOutputStringOutput", [], [dtypes.float32, dtypes.string], None, + name="myop2") op3 = g.create_op( - "foo", + "Foo3", [list(op1.values())[0], list(op2.values())[1], list(op2.values())[0]], [dtypes.float32, dtypes.int32], None, @@ -515,52 +503,57 @@ class CreateOpTest(test_util.TensorFlowTestCase): self.assertDeviceEqual(None, op1.device) self.assertDeviceEqual("/device:GPU:0", op2.device) self.assertDeviceEqual(None, op3.device) - self.assertProtoEquals("name:'myop1' op:'const'", op1.node_def) - self.assertProtoEquals("name:'myop2' op:'add' device:'/device:GPU:0'", - op2.node_def) + self.assertProtoEquals("name:'myop1' op:'FloatOutput'", op1.node_def) self.assertProtoEquals( - "name:'myop3' input:'myop1' input:'myop2:1' input:'myop2' op:'foo'", + "name:'myop2' op:'FloatOutputStringOutput' device:'/device:GPU:0'", + op2.node_def) + self.assertProtoEquals( + "name:'myop3' input:'myop1' input:'myop2:1' input:'myop2' op:'Foo3'", op3.node_def) def testReferenceInput(self): g = ops.Graph() op1 = g.create_op( - "noop", [], [dtypes.float32_ref, dtypes.float32], name="op1") - self.assertProtoEquals("op:'noop' name:'op1'", op1.node_def) + "RefOutputFloatOutput", [], [dtypes.float32_ref, dtypes.float32], + name="op1") + self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", op1.node_def) ref_t, nonref_t = op1.values() # NOTE(mrry): Must specify input_types to preserve ref-typed input. op2 = g.create_op( - "refop", [ref_t, nonref_t], [], + "RefInputFloatInput", [ref_t, nonref_t], [], input_types=[dtypes.float32_ref, dtypes.float32], name="op2") - self.assertProtoEquals("op:'refop' name:'op2' input:'op1' input:'op1:1'", - op2.node_def) - op3 = g.create_op("nonrefop", [ref_t, nonref_t], [], name="op3") - self.assertProtoEquals("op:'nonrefop' name:'op3' input:'op1' input:'op1:1'", - op3.node_def) + self.assertProtoEquals( + "op:'RefInputFloatInput' name:'op2' input:'op1' input:'op1:1'", + op2.node_def) + op3 = g.create_op("TwoFloatInputs", [ref_t, nonref_t], [], name="op3") + self.assertProtoEquals( + "op:'TwoFloatInputs' name:'op3' input:'op1' input:'op1:1'", + op3.node_def) def testFinalized(self): g = ops.Graph() g.finalize() with self.assertRaises(RuntimeError): - g.create_op("const", [], [dtypes.float32], None, name="myop1") + g.create_op("FloatOutput", [], [dtypes.float32], None, name="myop1") # Test unfinalize. g._unsafe_unfinalize() - g.create_op("const", [], [dtypes.float32], None, name="myop1") + g.create_op("FloatOutput", [], [dtypes.float32], None, name="myop1") +@test_util.with_c_api class ApplyOpTest(test_util.TensorFlowTestCase): def testNodeDefArgs(self): g = ops.Graph() - t1 = _apply_op(g, "const", [], [dtypes.float32], name="myop1") + t1 = _apply_op(g, "FloatOutput", [], [dtypes.float32], name="myop1") with g.device("/device:GPU:0"): t2 = _apply_op( - g, "add", [], [dtypes.float32, dtypes.string], name="myop2") + g, "TwoIntOutputs", [], [dtypes.int32, dtypes.int32], name="myop2") t3 = _apply_op( g, - "foo", [t1, t2[1], t2[0]], [dtypes.float32, dtypes.int32], + "Foo1", [t1, t2[1], t2[0]], [dtypes.float32, dtypes.int32], name="myop3") self.assertTrue(isinstance(t1, ops.Tensor)) self.assertTrue(isinstance(t2, list)) @@ -571,32 +564,39 @@ class ApplyOpTest(test_util.TensorFlowTestCase): self.assertEqual("myop2:1", t2[1]._as_node_def_input()) self.assertEqual("myop3", t3[0]._as_node_def_input()) # Validate that we got the right ops as well - self.assertProtoEquals("name:'myop1' op:'const'", t1.op.node_def) - self.assertProtoEquals("name:'myop2' op:'add' device:'/device:GPU:0'", - t2[0].op.node_def) + self.assertProtoEquals("name:'myop1' op:'FloatOutput'", t1.op.node_def) + self.assertProtoEquals( + "name:'myop2' op:'TwoIntOutputs' device:'/device:GPU:0'", + t2[0].op.node_def) self.assertProtoEquals( - "name:'myop3' input:'myop1' input:'myop2:1' input:'myop2' op:'foo'", + "name:'myop3' input:'myop1' input:'myop2:1' input:'myop2' op:'Foo1'", t3[0].op.node_def) def testReferenceInput(self): g = ops.Graph() ref_t, nonref_t = _apply_op( - g, "noop", [], [dtypes.float32_ref, dtypes.float32], name="op1") - self.assertProtoEquals("op:'noop' name:'op1'", ref_t.op.node_def) + g, "RefOutputFloatOutput", [], [dtypes.float32_ref, dtypes.float32], + name="op1") + self.assertProtoEquals("op:'RefOutputFloatOutput' name:'op1'", + ref_t.op.node_def) # NOTE(mrry): Must specify input_types to preserve ref-typed input. out_2 = _apply_op( g, - "refop", [ref_t, nonref_t], [dtypes.int32], + "RefInputFloatInputIntOutput", [ref_t, nonref_t], [dtypes.int32], input_types=[dtypes.float32_ref, dtypes.float32], name="op2") - self.assertProtoEquals("op:'refop' name:'op2' input:'op1' input:'op1:1'", - out_2.op.node_def) + self.assertProtoEquals( + "op:'RefInputFloatInputIntOutput' name:'op2' input:'op1' input:'op1:1'", + out_2.op.node_def) out_3 = _apply_op( - g, "nonrefop", [ref_t, nonref_t], [dtypes.int32], name="op3") - self.assertProtoEquals("op:'nonrefop' name:'op3' input:'op1' input:'op1:1'", - out_3.op.node_def) + g, "TwoFloatInputsIntOutput", [ref_t, nonref_t], [dtypes.int32], + name="op3") + self.assertProtoEquals( + "op:'TwoFloatInputsIntOutput' name:'op3' input:'op1' input:'op1:1'", + out_3.op.node_def) +@test_util.with_c_api class NameStackTest(test_util.TensorFlowTestCase): def testBasics(self): @@ -695,22 +695,27 @@ class NameStackTest(test_util.TensorFlowTestCase): pass +@test_util.with_c_api class NameTest(test_util.TensorFlowTestCase): def testGenerateName(self): g = ops.Graph() - op0 = g.create_op("const", [], [dtypes.float32, dtypes.float32]) - self.assertEqual("const", op0.name) - self.assertEqual("const:0", op0.outputs[0].name) - self.assertEqual("const:1", op0.outputs[1].name) + op0 = g.create_op("TwoFloatOutputs", [], [dtypes.float32, dtypes.float32]) + self.assertEqual("TwoFloatOutputs", op0.name) + self.assertEqual("TwoFloatOutputs:0", op0.outputs[0].name) + self.assertEqual("TwoFloatOutputs:1", op0.outputs[1].name) + + op1 = g.create_op("FloatOutput", [], [dtypes.float32]) + self.assertEqual("FloatOutput", op1.name) + self.assertEqual("FloatOutput:0", op1.outputs[0].name) - op1 = g.create_op("const", [], [dtypes.float32]) - self.assertEqual("const_1", op1.name) - self.assertEqual("const_1:0", op1.outputs[0].name) + op2 = g.create_op("FloatOutput", [], [dtypes.float32]) + self.assertEqual("FloatOutput_1", op2.name) + self.assertEqual("FloatOutput_1:0", op2.outputs[0].name) - op2 = g.create_op("const", [], [dtypes.float32], name="my_op") - self.assertEqual("my_op", op2.name) - self.assertEqual("my_op:0", op2.outputs[0].name) + op3 = g.create_op("FloatOutput", [], [dtypes.float32], name="my_op") + self.assertEqual("my_op", op3.name) + self.assertEqual("my_op:0", op3.outputs[0].name) def testNameScope(self): g = ops.Graph() @@ -726,57 +731,60 @@ class NameTest(test_util.TensorFlowTestCase): with g.name_scope("") as empty2: self.assertEqual("", empty2) - self.assertEqual("const", g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("FloatOutput", + g.create_op("FloatOutput", [], [dtypes.float32]).name) with g.name_scope("bar") as scope: - self.assertEqual("bar/const", - g.create_op("const", [], [dtypes.float32]).name) - self.assertEqual("bar/const_1", - g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("bar/FloatOutput", + g.create_op("FloatOutput", [], [dtypes.float32]).name) + self.assertEqual("bar/FloatOutput_1", + g.create_op("FloatOutput", [], [dtypes.float32]).name) # If you use the value from "with .. as", that values is used as-is. self.assertEqual( "bar", g.create_op( - "const", [], [dtypes.float32], name=scope).name) + "FloatOutput", [], [dtypes.float32], name=scope).name) with g.name_scope("baz") as scope: with g.name_scope("quux"): - self.assertEqual("baz/quux/const", - g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("baz/quux/FloatOutput", + g.create_op("FloatOutput", [], [dtypes.float32]).name) # If you use the value from the enclosing "with .. as", nothing is pushed. with g.name_scope(scope): - self.assertEqual("baz/const", - g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("baz/FloatOutput", + g.create_op("FloatOutput", [], [dtypes.float32]).name) self.assertEqual( "baz", g.create_op( - "const", [], [dtypes.float32], name=scope).name) + "FloatOutput", [], [dtypes.float32], name=scope).name) self.assertEqual( "trailing", g.create_op( - "const", [], [dtypes.float32], name="trailing/").name) + "FloatOutput", [], [dtypes.float32], name="trailing/").name) with g.name_scope("bar"): - self.assertEqual("bar_1/const", - g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("bar_1/FloatOutput", + g.create_op("FloatOutput", [], [dtypes.float32]).name) with g.name_scope("bar/"): - self.assertEqual("bar/const_2", - g.create_op("const", [], [dtypes.float32]).name) + self.assertEqual("bar/FloatOutput_2", + g.create_op("FloatOutput", [], [dtypes.float32]).name) +@test_util.with_c_api class DeviceTest(test_util.TensorFlowTestCase): def testNoDevice(self): g = ops.Graph() - op = g.create_op("an_op", [], [dtypes.float32]) + op = g.create_op("FloatOutput", [], [dtypes.float32]) self.assertDeviceEqual(None, op.device) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" } + node { name: "FloatOutput" op: "FloatOutput" } """, gd) def testDevicePartialString(self): g = ops.Graph() with g.device("/job:worker/replica:2"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" device: "/job:worker/replica:2" } + node { name: "FloatOutput" op: "FloatOutput" + device: "/job:worker/replica:2" } """, gd) def testDeviceFull(self): @@ -785,61 +793,61 @@ class DeviceTest(test_util.TensorFlowTestCase): pydev.DeviceSpec( job="worker", replica=2, task=0, device_type="CPU", device_index=3)): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2/task:0/device:CPU:3" } """, gd) def testNesting(self): g = ops.Graph() with g.device("/job:worker/replica:2"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:worker/replica:3/task:0"): - g.create_op("an_op", [], [dtypes.float32]) - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:worker/replica:3/task:0" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/replica:2" } """, gd) def testNestingString(self): g = ops.Graph() with g.device("/job:worker/replica:2"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:worker/replica:3/task:0"): - g.create_op("an_op", [], [dtypes.float32]) - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:worker/replica:3/task:0" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/replica:2" } """, gd) def testNestingOverrideGpuCpu(self): g = ops.Graph() with g.device("/job:worker/replica:2/device:CPU:1"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:worker/replica:2/device:GPU:2"): - g.create_op("an_op", [], [dtypes.float32]) - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:worker/replica:2/device:GPU:2" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } """, gd) @@ -847,27 +855,27 @@ class DeviceTest(test_util.TensorFlowTestCase): g = ops.Graph() with g.device(pydev.merge_device("/device:GPU:0")): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(pydev.merge_device("/job:worker")): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(pydev.merge_device("/device:CPU:0")): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(pydev.merge_device("/job:ps")): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(pydev.merge_device(None)): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/device:GPU:0" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:worker/device:GPU:0" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/device:CPU:0" } - node { name: "an_op_3" op: "an_op" + node { name: "FloatOutput_3" op: "FloatOutput" device: "/job:ps/device:CPU:0" } - node { name: "an_op_4" op: "an_op" + node { name: "FloatOutput_4" op: "FloatOutput" device: "/job:ps/device:CPU:0" } """, gd) @@ -875,27 +883,27 @@ class DeviceTest(test_util.TensorFlowTestCase): g = ops.Graph() with g.device("/device:GPU:0"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:worker"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/device:CPU:0"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:ps"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(""): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/device:GPU:0" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:worker/device:GPU:0" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/device:CPU:0" } - node { name: "an_op_3" op: "an_op" + node { name: "FloatOutput_3" op: "FloatOutput" device: "/job:ps/device:CPU:0" } - node { name: "an_op_4" op: "an_op" + node { name: "FloatOutput_4" op: "FloatOutput" device: "/job:ps/device:CPU:0" } """, gd) @@ -903,56 +911,56 @@ class DeviceTest(test_util.TensorFlowTestCase): g = ops.Graph() with g.device("/device:GPU:7"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/device:GPU:*"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/device:CPU:*"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/device:CPU:5"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/device:GPU:7" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/device:GPU:7" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/device:CPU:*" } - node { name: "an_op_3" op: "an_op" + node { name: "FloatOutput_3" op: "FloatOutput" device: "/device:CPU:5" } """, gd) def testNoneClearsDefault(self): g = ops.Graph() with g.device("/job:worker/replica:2/device:CPU:1"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(None): - g.create_op("an_op", [], [dtypes.float32]) - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } - node { name: "an_op_1" op: "an_op" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" } + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } """, gd) def testNoneIgnoresOuterDeviceFunction(self): g = ops.Graph() with g.device(lambda op: "/job:worker/replica:2/device:CPU:1"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(None): - g.create_op("an_op", [], [dtypes.float32]) - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } - node { name: "an_op_1" op: "an_op" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" } + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:worker/replica:2/device:CPU:1" } """, gd) @@ -968,32 +976,33 @@ class DeviceTest(test_util.TensorFlowTestCase): def testOverwritingBehavior(self): g = ops.Graph() with g.device(self._overwritingDeviceFunction): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device("/job:ps"): # Will be overwritten. - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(pydev.merge_device("/job:ps")): # Will be overwritten. - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(None): # Disables overwriting device function with g.device("/job:ps"): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) with g.device(None): # Disables overwriting device function with g.device(pydev.merge_device("/job:ps")): - g.create_op("an_op", [], [dtypes.float32]) + g.create_op("FloatOutput", [], [dtypes.float32]) gd = g.as_graph_def() self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FloatOutput" op: "FloatOutput" device: "/job:overwrite" } - node { name: "an_op_1" op: "an_op" + node { name: "FloatOutput_1" op: "FloatOutput" device: "/job:overwrite" } - node { name: "an_op_2" op: "an_op" + node { name: "FloatOutput_2" op: "FloatOutput" device: "/job:overwrite" } - node { name: "an_op_3" op: "an_op" + node { name: "FloatOutput_3" op: "FloatOutput" device: "/job:ps" } - node { name: "an_op_4" op: "an_op" + node { name: "FloatOutput_4" op: "FloatOutput" device: "/job:ps" } """, gd) +@test_util.with_c_api class ObjectWithName(object): def __init__(self, name): @@ -1004,6 +1013,7 @@ class ObjectWithName(object): return self._name +@test_util.with_c_api class CollectionTest(test_util.TensorFlowTestCase): def test_get_collections(self): @@ -1112,18 +1122,10 @@ class CollectionTest(test_util.TensorFlowTestCase): self.assertEqual([90, 100], ops.get_collection("key")) -def an_op(g): - return _apply_op(g, "an_op", [], [dtypes.float32]) - +ops.NotDifferentiable("FloatOutput") -ops.NotDifferentiable("an_op") - -def copy_op(x): - return _apply_op(x.graph, "copy", [x], [x.dtype]) - - -@ops.RegisterGradient("copy") +@ops.RegisterGradient("CopyOp") def _CopyGrad(op, x_grad): # pylint: disable=invalid-name _ = op return x_grad @@ -1135,44 +1137,48 @@ def _CopyOverrideGrad(op, x_grad): # pylint: disable=invalid-name return x_grad +@test_util.with_c_api class RegistrationTest(test_util.TensorFlowTestCase): def testRegisterGradients(self): - g = ops.Graph() - x = an_op(g) - y = copy_op(x) + x = test_ops.float_output() + y = test_ops.copy_op(x) fn = ops.get_gradient_function(y.op) self.assertEqual(_CopyGrad, fn) def testOverrideGradients(self): g = ops.Graph() - x = an_op(g) - with g.gradient_override_map({"copy": "copy_override"}): - y = copy_op(x) - fn = ops.get_gradient_function(y.op) - self.assertEqual(_CopyOverrideGrad, fn) + with g.as_default(): + x = test_ops.float_output() + with g.gradient_override_map({"CopyOp": "copy_override"}): + y = test_ops.copy_op(x) + fn = ops.get_gradient_function(y.op) + self.assertEqual(_CopyOverrideGrad, fn) def testNonExistentOverride(self): g = ops.Graph() - x = an_op(g) - with g.gradient_override_map({"copy": "unknown_override"}): - y = copy_op(x) - with self.assertRaisesRegexp(LookupError, "unknown_override"): - ops.get_gradient_function(y.op) + with g.as_default(): + x = test_ops.float_output() + with g.gradient_override_map({"CopyOp": "unknown_override"}): + y = test_ops.copy_op(x) + with self.assertRaisesRegexp(LookupError, "unknown_override"): + ops.get_gradient_function(y.op) +@test_util.with_c_api class ComparisonTest(test_util.TensorFlowTestCase): def testMembershipAllowed(self): g = ops.Graph() - t1 = _apply_op(g, "const", [], [dtypes.float32], name="myop1") - t2 = _apply_op(g, "const", [], [dtypes.float32], name="myop2") + t1 = _apply_op(g, "FloatOutput", [], [dtypes.float32], name="myop1") + t2 = _apply_op(g, "FloatOutput", [], [dtypes.float32], name="myop2") self.assertTrue(isinstance(t1, ops.Tensor)) self.assertTrue(isinstance(t2, ops.Tensor)) self.assertTrue(t1 in [t1]) self.assertTrue(t1 not in [t2]) +@test_util.with_c_api class ControlDependenciesTest(test_util.TensorFlowTestCase): @test_util.enable_c_api @@ -1198,7 +1204,7 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): def testBasicWithConversion(self): g = ops.Graph() - a = _apply_op(g, "const", [], [dtypes.float32]) + a = _apply_op(g, "FloatOutput", [], [dtypes.float32]) class ConvertibleObj(object): @@ -1206,25 +1212,25 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): return a with g.control_dependencies([ConvertibleObj()]): - c = _apply_op(g, "const", [], [dtypes.float32]) + c = _apply_op(g, "FloatOutput", [], [dtypes.float32]) self.assertEqual(c.op.control_inputs, [a.op]) def testNested(self): g = ops.Graph() - a_1 = _apply_op(g, "const", [], [dtypes.float32]) - a_2 = _apply_op(g, "const", [], [dtypes.float32]) - a_3 = _apply_op(g, "const", [], [dtypes.float32]) - a_4 = _apply_op(g, "const", [], [dtypes.float32]) + a_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_1, a_2, a_3, a_4]): - b_1 = _apply_op(g, "const", [], [dtypes.float32]) + b_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_1]): with g.control_dependencies([a_2]): with g.control_dependencies([a_3]): with g.control_dependencies([a_4]): - b_2 = _apply_op(g, "const", [], [dtypes.float32]) + b_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) self.assertItemsEqual([a_1.op, a_2.op, a_3.op, a_4.op], b_1.op.control_inputs) @@ -1232,10 +1238,10 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): def testClear(self): g = ops.Graph() - a_1 = _apply_op(g, "const", [], [dtypes.float32]) - a_2 = _apply_op(g, "const", [], [dtypes.float32]) - a_3 = _apply_op(g, "const", [], [dtypes.float32]) - a_4 = _apply_op(g, "const", [], [dtypes.float32]) + a_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_1]): with g.control_dependencies([a_2]): @@ -1243,18 +1249,18 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): with g.control_dependencies([a_3]): with g.control_dependencies([a_4]): # deps [a_3, a_4] - b_3_4 = _apply_op(g, "const", [], [dtypes.float32]) + b_3_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) # deps = [a_3] - b_3 = _apply_op(g, "const", [], [dtypes.float32]) + b_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) # deps back to None - b_none = _apply_op(g, "const", [], [dtypes.float32]) + b_none = _apply_op(g, "FloatOutput", [], [dtypes.float32]) # deps back to [a_1, a_2] - b_1_2 = _apply_op(g, "const", [], [dtypes.float32]) + b_1_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) # deps back to [a_1] - b_1 = _apply_op(g, "const", [], [dtypes.float32]) + b_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies(None): # deps are None again - b_none2 = _apply_op(g, "const", [], [dtypes.float32]) + b_none2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) self.assertItemsEqual([a_3.op, a_4.op], b_3_4.op.control_inputs) self.assertItemsEqual([a_3.op], b_3.op.control_inputs) @@ -1274,31 +1280,46 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): # * Nodes d_i are defined as Mul(b_i, c_i) at each scope. # * Nodes e_i are defined as Mul(e_i-1, e_i-1) at each scope i > 1. - a_1 = _apply_op(g, "const", [], [dtypes.float32]) - a_2 = _apply_op(g, "const", [], [dtypes.float32]) - a_3 = _apply_op(g, "const", [], [dtypes.float32]) - a_4 = _apply_op(g, "const", [], [dtypes.float32]) + a_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_2 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_3 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) + a_4 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_1]): - b_1 = _apply_op(g, "mul", [a_3, a_4], [dtypes.float32]) - c_1 = _apply_op(g, "mul", [a_1, b_1], [dtypes.float32]) - d_1 = _apply_op(g, "mul", [b_1, c_1], [dtypes.float32]) - e_1 = _apply_op(g, "const", [], [dtypes.float32]) + b_1 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_3, a_4], + [dtypes.float32]) + c_1 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_1, b_1], + [dtypes.float32]) + d_1 = _apply_op(g, "TwoFloatInputsFloatOutput", [b_1, c_1], + [dtypes.float32]) + e_1 = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_2]): - b_2 = _apply_op(g, "mul", [a_3, a_4], [dtypes.float32]) - c_2 = _apply_op(g, "mul", [a_1, b_1], [dtypes.float32]) - d_2 = _apply_op(g, "mul", [b_2, c_2], [dtypes.float32]) - e_2 = _apply_op(g, "mul", [e_1, e_1], [dtypes.float32]) + b_2 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_3, a_4], + [dtypes.float32]) + c_2 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_1, b_1], + [dtypes.float32]) + d_2 = _apply_op(g, "TwoFloatInputsFloatOutput", [b_2, c_2], + [dtypes.float32]) + e_2 = _apply_op(g, "TwoFloatInputsFloatOutput", [e_1, e_1], + [dtypes.float32]) with g.control_dependencies([a_3]): - b_3 = _apply_op(g, "mul", [a_3, a_4], [dtypes.float32]) - c_3 = _apply_op(g, "mul", [a_1, b_1], [dtypes.float32]) - d_3 = _apply_op(g, "mul", [b_3, c_3], [dtypes.float32]) - e_3 = _apply_op(g, "mul", [e_2, e_2], [dtypes.float32]) + b_3 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_3, a_4], + [dtypes.float32]) + c_3 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_1, b_1], + [dtypes.float32]) + d_3 = _apply_op(g, "TwoFloatInputsFloatOutput", [b_3, c_3], + [dtypes.float32]) + e_3 = _apply_op(g, "TwoFloatInputsFloatOutput", [e_2, e_2], + [dtypes.float32]) with g.control_dependencies([a_4]): - b_4 = _apply_op(g, "mul", [a_3, a_4], [dtypes.float32]) - c_4 = _apply_op(g, "mul", [a_1, b_1], [dtypes.float32]) - d_4 = _apply_op(g, "mul", [b_4, c_4], [dtypes.float32]) - e_4 = _apply_op(g, "mul", [e_3, e_3], [dtypes.float32]) + b_4 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_3, a_4], + [dtypes.float32]) + c_4 = _apply_op(g, "TwoFloatInputsFloatOutput", [a_1, b_1], + [dtypes.float32]) + d_4 = _apply_op(g, "TwoFloatInputsFloatOutput", [b_4, c_4], + [dtypes.float32]) + e_4 = _apply_op(g, "TwoFloatInputsFloatOutput", [e_3, e_3], + [dtypes.float32]) self.assertItemsEqual([a_1.op], b_1.op.control_inputs) self.assertItemsEqual([a_1.op, a_2.op], b_2.op.control_inputs) @@ -1322,25 +1343,26 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): def testRepeatedDependency(self): g = ops.Graph() - a = g.create_op("foo", [], [dtypes.float32, dtypes.float32]) + a = g.create_op("TwoFloatOutputs", [], [dtypes.float32, dtypes.float32]) a_0, a_1 = a.outputs with g.control_dependencies([a_0]): - b = _apply_op(g, "const", [], [dtypes.float32]) + b = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a_1]): - c = _apply_op(g, "const", [], [dtypes.float32]) + c = _apply_op(g, "FloatOutput", [], [dtypes.float32]) self.assertEqual(b.op.control_inputs, [a]) self.assertEqual(c.op.control_inputs, [a]) def testNoControlDependencyWithDataDependency(self): g = ops.Graph() - a = _apply_op(g, "const", [], [dtypes.float32]) + a = _apply_op(g, "FloatOutput", [], [dtypes.float32]) with g.control_dependencies([a]): - b = _apply_op(g, "identity", [a], [dtypes.float32]) + b = _apply_op(g, "Identity", [a], [dtypes.float32]) self.assertEqual(b.op.control_inputs, []) +@test_util.with_c_api class OpScopeTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() @@ -1353,8 +1375,8 @@ class OpScopeTest(test_util.TensorFlowTestCase): def testNoScopeName(self): g0 = ops.Graph() values = [ - g0.create_op("a", [], [dtypes.float32]), - g0.create_op("b", [], [dtypes.float32]) + g0.create_op("A", [], [dtypes.float32]), + g0.create_op("B", [], [dtypes.float32]) ] with self.assertRaises(ValueError): with ops.name_scope(None, values=values): @@ -1365,8 +1387,8 @@ class OpScopeTest(test_util.TensorFlowTestCase): def testEmptyScopeName(self): g0 = ops.Graph() - a = g0.create_op("a", [], [dtypes.float32]) - b = g0.create_op("b", [], [dtypes.float32]) + a = g0.create_op("A", [], [dtypes.float32]) + b = g0.create_op("B", [], [dtypes.float32]) with ops.name_scope("", values=[a, b]) as scope: self.assertEqual("", scope) self.assertEqual(g0, ops.get_default_graph()) @@ -1376,8 +1398,8 @@ class OpScopeTest(test_util.TensorFlowTestCase): def testDefaultScopeName(self): g0 = ops.Graph() - a = g0.create_op("a", [], [dtypes.float32]) - b = g0.create_op("b", [], [dtypes.float32]) + a = g0.create_op("A", [], [dtypes.float32]) + b = g0.create_op("B", [], [dtypes.float32]) scope_name = "my_scope" default_scope_name = "my_default_scope" with ops.name_scope(scope_name, default_scope_name, [a, b]) as scope: @@ -1393,36 +1415,37 @@ class OpScopeTest(test_util.TensorFlowTestCase): self.assertEqual("%s/" % scope_name, scope) self.assertEqual(graph_elements[0].graph, ops.get_default_graph()) g1 = ops.Graph() - c = g1.create_op("c", [], [dtypes.float32]) + a = g1.create_op("A", [], [dtypes.float32]) with self.assertRaises(ValueError): - with ops.name_scope(scope_name, values=graph_elements + [c]): + with ops.name_scope(scope_name, values=graph_elements + [a]): pass def testTensor(self): g0 = ops.Graph() - a = g0.create_op("a", [], [dtypes.float32]) - b = g0.create_op("b", [], [dtypes.float32]) + a = g0.create_op("A", [], [dtypes.float32]) + b = g0.create_op("B", [], [dtypes.float32]) self._testGraphElements([a, b]) def testSparseTensor(self): g0 = ops.Graph() - a = g0.create_op("a", [], [dtypes.float32]) - b = g0.create_op("b", [], [dtypes.float32]) + a = g0.create_op("A", [], [dtypes.float32]) + b = g0.create_op("B", [], [dtypes.float32]) sparse = sparse_tensor.SparseTensor( - _apply_op(g0, "const", [], [dtypes.int64]), - _apply_op(g0, "const", [], [dtypes.float32]), - _apply_op(g0, "const", [], [dtypes.int64])) + _apply_op(g0, "Int64Output", [], [dtypes.int64]), + _apply_op(g0, "FloatOutput", [], [dtypes.float32]), + _apply_op(g0, "Int64Output", [], [dtypes.int64])) self._testGraphElements([a, sparse, b]) def testVariable(self): g0 = ops.Graph() with g0.as_default(): variable = variables.Variable([1.0]) - a = g0.create_op("a", [], [dtypes.float32]) - b = g0.create_op("b", [], [dtypes.float32]) + a = g0.create_op("A", [], [dtypes.float32]) + b = g0.create_op("B", [], [dtypes.float32]) self._testGraphElements([a, variable, b]) +@test_util.with_c_api class GraphTest(test_util.TensorFlowTestCase): def setUp(self): @@ -1461,14 +1484,14 @@ class GraphTest(test_util.TensorFlowTestCase): class ConvertibleObj(object): def _as_graph_element(self): - return "const:0" + return "FloatOutput:0" class NonConvertibleObj(object): pass g = ops.Graph() - a = _apply_op(g, "const", [], [dtypes.float32]) + a = _apply_op(g, "FloatOutput", [], [dtypes.float32]) self.assertEqual(a, g.as_graph_element(ConvertibleObj())) with self.assertRaises(TypeError): g.as_graph_element(NonConvertibleObj()) @@ -1500,6 +1523,7 @@ class GraphTest(test_util.TensorFlowTestCase): self.assertIsNone(g_ref()) +@test_util.with_c_api class AttrScopeTest(test_util.TensorFlowTestCase): def _get_test_attrs(self): @@ -1551,8 +1575,10 @@ class AttrScopeTest(test_util.TensorFlowTestCase): ops.RegisterShape("KernelLabel")(common_shapes.scalar_shape) +@test_util.with_c_api class KernelLabelTest(test_util.TensorFlowTestCase): + @test_util.enable_c_api def testNoLabel(self): with self.test_session(): self.assertAllEqual(b"My label is: default", @@ -1594,7 +1620,8 @@ class AsGraphDefTest(test_util.TensorFlowTestCase): def testAddShapes(self): with ops.Graph().as_default() as g: - t1, t2, t3, t4, t5 = _apply_op(g, "an_op", [], [dtypes.float32] * 5) + t1, t2, t3, t4, t5 = _apply_op(g, "FiveFloatOutputs", [], + [dtypes.float32] * 5) t1.set_shape(None) t2.set_shape([]) t3.set_shape([None]) @@ -1603,7 +1630,7 @@ class AsGraphDefTest(test_util.TensorFlowTestCase): gd = g.as_graph_def(add_shapes=True) self.assertProtoEqualsVersion(""" - node { name: "an_op" op: "an_op" + node { name: "FiveFloatOutputs" op: "FiveFloatOutputs" attr { key: "_output_shapes" value { @@ -1625,6 +1652,7 @@ def _calc_a_forward_flops(unused_graph, unused_node): return ops.OpStats("flops", 20) +@test_util.with_c_api class StatisticsTest(test_util.TensorFlowTestCase): def testRegisteredNode(self): @@ -1649,6 +1677,7 @@ class StatisticsTest(test_util.TensorFlowTestCase): self.assertEqual(3, flops_total.value) +@test_util.with_c_api class ColocationGroupTest(test_util.TensorFlowTestCase): def testBasic(self): @@ -1773,9 +1802,13 @@ class ColocationGroupTest(test_util.TensorFlowTestCase): self.assertEqual("/device:CPU:0", b.device) +@test_util.with_c_api class DeprecatedTest(test_util.TensorFlowTestCase): def testSuccess(self): + # TODO(skyewm): make g.graph_def_versions work with the C API enabled + if ops._USE_C_API: return + with ops.Graph().as_default() as g: g.graph_def_versions.producer = 7 old = test_ops.old() @@ -1793,6 +1826,9 @@ class DeprecatedTest(test_util.TensorFlowTestCase): test_ops.old() def testGraphExecutionFail(self): + # TODO(skyewm): make g.graph_def_versions work with the C API enabled + if ops._USE_C_API: return + with ops.Graph().as_default() as g: g.graph_def_versions.producer = 7 old = test_ops.old() @@ -1802,11 +1838,12 @@ class DeprecatedTest(test_util.TensorFlowTestCase): old.run() +@test_util.with_c_api class DenseTensorLikeTypeTest(test_util.TensorFlowTestCase): def testSuccess(self): op = ops.Operation( - ops._NodeDef("noop", "myop"), ops.Graph(), [], [dtypes.float32]) + ops._NodeDef("None", "myop"), ops.Graph(), [], [dtypes.float32]) t = op.outputs[0] self.assertTrue(ops.is_dense_tensor_like(t)) @@ -1851,6 +1888,7 @@ class DenseTensorLikeTypeTest(test_util.TensorFlowTestCase): DenseTensorLikeTypeTest.BadClassBadDtype) +@test_util.with_c_api class NameScopeTest(test_util.TensorFlowTestCase): def testStripAndPrependScope(self): @@ -1889,6 +1927,7 @@ class NameScopeTest(test_util.TensorFlowTestCase): self.assertEqual("", g.get_name_scope()) +@test_util.with_c_api class TracebackTest(test_util.TensorFlowTestCase): def testTracebackWithStartLines(self): @@ -1910,6 +1949,7 @@ class TracebackTest(test_util.TensorFlowTestCase): self.assertEquals(frame, frame_with_start_line[:-1]) +@test_util.with_c_api class OutputTypesTest(test_util.TensorFlowTestCase): """Tests Operation._output_types property. @@ -1959,6 +1999,7 @@ class OutputTypesTest(test_util.TensorFlowTestCase): # pylint: enable=protected-access +@test_util.with_c_api class InputTypesTest(test_util.TensorFlowTestCase): """Tests Operation._input_dtypes and Operation._input_types properties. diff --git a/tensorflow/python/framework/test_ops.cc b/tensorflow/python/framework/test_ops.cc index d22b5b3e25..ead756a0a1 100644 --- a/tensorflow/python/framework/test_ops.cc +++ b/tensorflow/python/framework/test_ops.cc @@ -170,4 +170,165 @@ class ResourceUsingOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("ResourceUsingOp").Device(DEVICE_CPU), ResourceUsingOp); +// Various test ops without kernels. These are used to test graph construction. + +REGISTER_OP("A") + .Output("out: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("B") + .Output("out: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("Foo1") + .Input("a: float32") + .Input("b: int32") + .Input("c: int32") + .Output("d: float32") + .Output("e: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("Foo2") + .Input("a: float32") + .Input("b: string") + .Input("c: string") + .Output("d: float32") + .Output("e: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("Foo3") + .Input("a: float32") + .Input("b: string") + .Input("c: float32") + .Output("d: float32") + .Output("e: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("CopyOp").Input("a: T").Output("b: T").Attr("T: type").SetShapeFn( + shape_inference::UnknownShape); + +REGISTER_OP("None").SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("IntOutput") + .Output("a: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("Int64Output") + .Output("out: int64") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("RefOutput") + .Output("a: Ref(int32)") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FloatOutput") + .Output("a: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoFloatOutputs") + .Output("a: float32") + .Output("b: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FiveFloatOutputs") + .Output("a: float32") + .Output("b: float32") + .Output("c: float32") + .Output("d: float32") + .Output("e: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("RefOutputFloatOutput") + .Output("a: Ref(float32)") + .Output("b: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("RefInputFloatInput") + .Input("a: Ref(float)") + .Input("b: float") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("IntInput") + .Input("a: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FloatInput") + .Input("a: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoIntOutputs") + .Output("a: int32") + .Output("b: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("IntOutputFloatOutput") + .Output("a: int32") + .Output("b: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("FloatOutputStringOutput") + .Output("a: float32") + .Output("b: string") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoIntInputs") + .Input("a: int32") + .Input("b: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoFloatInputs") + .Input("a: float32") + .Input("b: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("IntInputFloatInput") + .Input("a: int32") + .Input("b: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("RefInputIntInput") + .Input("a: Ref(int32)") + .Input("b: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoFloatInputsFloatOutput") + .Input("a: float32") + .Input("b: float32") + .Output("c: float32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("TwoFloatInputsIntOutput") + .Input("a: float32") + .Input("b: float32") + .Output("c: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("RefInputFloatInputIntOutput") + .Input("a: Ref(float32)") + .Input("b: float32") + .Output("c: int32") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("ListInput") + .Input("a: N * T") + .Attr("N: int >= 1") + .Attr("T: type") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("ListOutput") + .Output("a: T") + .Attr("T: list(type) >= 1") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("Unary").Input("a: T").Output("b: T").Attr("T: type").SetShapeFn( + shape_inference::UnknownShape); + +REGISTER_OP("OpWithDefaultAttr") + .Output("a: int32") + .Attr("default_float: float = 123.0") + .SetShapeFn(shape_inference::UnknownShape); + +REGISTER_OP("OpWithFutureDefaultAttr") + .SetShapeFn(shape_inference::UnknownShape); + } // end namespace tensorflow -- GitLab From 1ba562a6878905c9967e999a73e749b59de56e21 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 9 Oct 2017 12:48:22 -0700 Subject: [PATCH 188/909] Rewrote the clip_by_norm op to avoid generating infinite intermediate results when processing tensors of zeros. PiperOrigin-RevId: 171573629 --- tensorflow/python/BUILD | 15 ++++++++ tensorflow/python/ops/clip_ops.py | 8 ++--- tensorflow/python/ops/clip_ops_test.py | 50 ++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 tensorflow/python/ops/clip_ops_test.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bdbad14660..1099611f37 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1485,6 +1485,21 @@ py_library( ], ) +py_test( + name = "clip_ops_test", + size = "small", + srcs = ["ops/clip_ops_test.py"], + srcs_version = "PY2AND3", + tags = ["no_windows"], + deps = [ + ":client_testlib", + ":clip_ops", + ":framework_for_generated_wrappers", + ":numerics", + "//third_party/py/numpy", + ], +) + py_library( name = "control_flow_grad", srcs = ["ops/control_flow_grad.py"], diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index 7430c28583..80803530c1 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -107,15 +107,13 @@ def clip_by_norm(t, clip_norm, axes=None, name=None): t = ops.convert_to_tensor(t, name="t") # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm - l2norm_inv = math_ops.rsqrt( - math_ops.reduce_sum(t * t, axes, keep_dims=True)) + l2norm = math_ops.sqrt(math_ops.reduce_sum(t * t, axes, keep_dims=True)) intermediate = t * clip_norm # Assert that the shape is compatible with the initial shape, # to prevent unintentional broadcasting. _ = t.shape.merge_with(intermediate.shape) - tclip = array_ops.identity(intermediate * math_ops.minimum( - l2norm_inv, constant_op.constant(1.0, dtype=t.dtype) / clip_norm), - name=name) + tclip = array_ops.identity( + intermediate / math_ops.maximum(l2norm, clip_norm), name=name) return tclip diff --git a/tensorflow/python/ops/clip_ops_test.py b/tensorflow/python/ops/clip_ops_test.py new file mode 100644 index 0000000000..7d8dc90491 --- /dev/null +++ b/tensorflow/python/ops/clip_ops_test.py @@ -0,0 +1,50 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Clip Operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import numerics +from tensorflow.python.platform import test + + +class ClipOpsTest(test.TestCase): + + def __init__(self, method_name="runTest"): + super(ClipOpsTest, self).__init__(method_name) + + def _testClipByNorm(self, inputs, max_norm, expected): + with self.test_session() as sess: + input_op = constant_op.constant(inputs) + clipped = clip_ops.clip_by_norm(input_op, max_norm) + check_op = numerics.add_check_numerics_ops() + result, _ = sess.run([clipped, check_op]) + self.assertAllClose(result, expected) + + def testClipByNorm(self): + # Simple example + self._testClipByNorm([[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]], 4.0, + [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]]) + # Zero norm + self._testClipByNorm([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], 4.0, + [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) + + +if __name__ == "__main__": + test.main() -- GitLab From 27df639673ae2bfe63b82862008da9bec488f0db Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 9 Oct 2017 13:00:39 -0700 Subject: [PATCH 189/909] [Grappler] Correctly replace control-dependency uses. When redirecting the use of node A to node B, old code incorrectly replace control dependencies with data dependencies. PiperOrigin-RevId: 171575072 --- .../optimizers/arithmetic_optimizer.cc | 14 ++++++++--- .../optimizers/arithmetic_optimizer_test.cc | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 343820de71..5c9073f049 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -555,12 +555,18 @@ void ArithmeticOptimizer::SimplifyArithmeticOps( for (NodeDef* consumer : consumers) { // Update `consumer`'s use of `node` to `input`'s operand. for (int i = 0; i < consumer->input_size(); ++i) { - if (NodeName(consumer->input(i)) == node->name()) { - *consumer->mutable_input(i) = simplified_tensor; + int operand_pos; + string operand_node_name = + ParseNodeName(consumer->input(i), &operand_pos); + if (operand_node_name == node->name()) { + *consumer->mutable_input(i) = + (operand_pos < 0 + ? AsControlDependency(NodeName(simplified_tensor)) + : simplified_tensor); } + VLOG(2) << "Update input " << consumer->input(i) << " of " + << consumer->name() << " to " << simplified_tensor; } - VLOG(2) << "Update input " << node->name() << " of " << consumer->name() - << " to " << simplified_tensor; node_map.UpdateInput(consumer->name(), node->name(), simplified_tensor); if (!nodes_to_simplify.Exists(consumer)) { nodes_to_simplify.PushBack(consumer); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index b3405646eb..7965419ea2 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -240,6 +240,31 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposesMultipleOutputs) { } } +TEST_F(ArithmeticOptimizerTest, RemoveTransposesWithControlDependency) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output inputs = + ops::Placeholder(s, DT_FLOAT, ops::Placeholder::Shape({2, 3})); + Output transpose1 = ops::Transpose(s, inputs, ops::Const(s, {1, 0})); + Output transpose2 = ops::Transpose(s, transpose1, ops::Const(s, {1, 0})); + Output outputs = + ops::Identity(s.WithOpName("outputs").WithControlDependencies(transpose2), + ops::Const(s.WithOpName("outputs_const"), 1.0f)); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + NodeMap node_map(&output); + const NodeDef* outputs_node = node_map.GetNode("outputs"); + EXPECT_EQ(2, outputs_node->input_size()); + EXPECT_EQ(outputs_node->input(0), "outputs_const"); + EXPECT_EQ(outputs_node->input(1), "^Placeholder"); +} + TEST_F(ArithmeticOptimizerTest, NotRemoveTransposes) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output inputs_shape = -- GitLab From 11c123b43bd26d7829a927f2150622be84d57ef2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 13:19:07 -0700 Subject: [PATCH 190/909] [TF:XLA] Rename HLO visitor methods from LogicalX to X PiperOrigin-RevId: 171577639 --- tensorflow/compiler/tests/randomized_tests.cc | 12 ++++----- .../compiler/xla/client/lib/arithmetic.cc | 10 +++---- .../compiler/xla/client/lib/arithmetic.h | 4 +-- .../compiler/xla/service/dfs_hlo_visitor.h | 17 ++++++------ .../compiler/xla/service/hlo_evaluator.cc | 27 +++++++++---------- .../compiler/xla/service/hlo_instruction.cc | 6 ++--- tensorflow/compiler/xla/tests/reduce_test.cc | 18 ++++++------- 7 files changed, 45 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index 7e307f16af..fef12d9397 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -1791,28 +1791,28 @@ TEST_F(OpTest, Log1p) { }); } -TEST_F(OpTest, LogicalAnd) { +TEST_F(OpTest, BooleanAnd) { Repeatedly([this]() { auto dims = BroadcastableDims(); return ExpectTfAndXlaOutputsAreClose( - OpTestBuilder("LogicalAnd") + OpTestBuilder("BooleanAnd") .RandomInput(DT_BOOL, dims.first) .RandomInput(DT_BOOL, dims.second)); }); } -TEST_F(OpTest, LogicalNot) { +TEST_F(OpTest, BooleanNot) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( - OpTestBuilder("LogicalNot").RandomInput(DT_BOOL)); + OpTestBuilder("BooleanNot").RandomInput(DT_BOOL)); }); } -TEST_F(OpTest, LogicalOr) { +TEST_F(OpTest, BooleanOr) { Repeatedly([this]() { auto dims = BroadcastableDims(); return ExpectTfAndXlaOutputsAreClose( - OpTestBuilder("LogicalOr") + OpTestBuilder("BooleanOr") .RandomInput(DT_BOOL, dims.first) .RandomInput(DT_BOOL, dims.second)); }); diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.cc b/tensorflow/compiler/xla/client/lib/arithmetic.cc index 99e9f2dbb2..24048a1e5a 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.cc +++ b/tensorflow/compiler/xla/client/lib/arithmetic.cc @@ -89,16 +89,16 @@ Computation CreateScalarMinComputation(PrimitiveType type, const ComputationDataHandle& rhs) { return b->Min(lhs, rhs); }); } -Computation CreateScalarLogicalAndComputation(ComputationBuilder* builder) { +Computation CreateScalarAndComputation(ComputationBuilder* builder) { return CreateScalarComputation( - "logical_and", PRED, builder, + "and", PRED, builder, [](ComputationBuilder* b, const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) { return b->And(lhs, rhs); }); } -Computation CreateScalarLogicalOrComputation(ComputationBuilder* builder) { +Computation CreateScalarOrComputation(ComputationBuilder* builder) { return CreateScalarComputation( - "logical_or", PRED, builder, + "or", PRED, builder, [](ComputationBuilder* b, const ComputationDataHandle& lhs, const ComputationDataHandle& rhs) { return b->Or(lhs, rhs); }); } @@ -106,7 +106,7 @@ Computation CreateScalarLogicalOrComputation(ComputationBuilder* builder) { StatusOr Any(const ComputationDataHandle& predicates, ComputationBuilder* builder) { auto f = builder->ConstantR0(false); - Computation logical_or = CreateScalarLogicalOrComputation(builder); + Computation logical_or = CreateScalarOrComputation(builder); TF_ASSIGN_OR_RETURN(std::unique_ptr predicates_shape, builder->GetShape(predicates)); std::vector all_dimensions(ShapeUtil::Rank(*predicates_shape)); diff --git a/tensorflow/compiler/xla/client/lib/arithmetic.h b/tensorflow/compiler/xla/client/lib/arithmetic.h index f43d35fe4a..ae89784bc2 100644 --- a/tensorflow/compiler/xla/client/lib/arithmetic.h +++ b/tensorflow/compiler/xla/client/lib/arithmetic.h @@ -45,10 +45,10 @@ Computation CreateScalarMinComputation(PrimitiveType type, ComputationBuilder* builder); // Creates a scalar logical AND computation and returns it. -Computation CreateScalarLogicalAndComputation(ComputationBuilder* builder); +Computation CreateScalarAndComputation(ComputationBuilder* builder); // Creates a scalar logical OR computation and returns it. -Computation CreateScalarLogicalOrComputation(ComputationBuilder* builder); +Computation CreateScalarOrComputation(ComputationBuilder* builder); // Returns whether any predicate in "predicates" is set. // diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 2c16a1b903..8c864f3d07 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -156,17 +156,16 @@ class DfsHloVisitor { HloInstruction* operand) { return HandleElementwiseUnary(is_finite); } - virtual Status HandleLogicalAnd(HloInstruction* logical_and, - HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(logical_and); + virtual Status HandleAnd(HloInstruction* and_, HloInstruction* lhs, + HloInstruction* rhs) { + return HandleElementwiseBinary(and_); } - virtual Status HandleLogicalNot(HloInstruction* logical_not, - HloInstruction* operand) { - return HandleElementwiseUnary(logical_not); + virtual Status HandleNot(HloInstruction* not_, HloInstruction* operand) { + return HandleElementwiseUnary(not_); } - virtual Status HandleLogicalOr(HloInstruction* logical_or, - HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(logical_or); + virtual Status HandleOr(HloInstruction* or_, HloInstruction* lhs, + HloInstruction* rhs) { + return HandleElementwiseBinary(or_); } virtual Status HandleReducePrecision(HloInstruction* reduce_precision) { return HandleElementwiseUnary(reduce_precision); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 61c59987f5..53e33c9fd0 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -255,12 +255,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); }; - Status HandleLogicalNot(HloInstruction* logical_not, - HloInstruction* operand) override { - TF_ASSIGN_OR_RETURN( - parent_->evaluated_[logical_not], - ElementWiseUnaryOp(logical_not, - [](ReturnT elem_operand) { return !elem_operand; })); + Status HandleNot(HloInstruction* not_, HloInstruction* operand) override { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[not_], + ElementWiseUnaryOp(not_, [](ReturnT elem_operand) { + return !elem_operand; + })); return Status::OK(); }; @@ -368,21 +367,21 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); }; - Status HandleLogicalAnd(HloInstruction* logical_and, HloInstruction* lhs, - HloInstruction* rhs) override { + Status HandleAnd(HloInstruction* and_, HloInstruction* lhs, + HloInstruction* rhs) override { TF_ASSIGN_OR_RETURN( - parent_->evaluated_[logical_and], - ElementWiseBinaryOp(logical_and, [](ReturnT lhs_el, ReturnT rhs_el) { + parent_->evaluated_[and_], + ElementWiseBinaryOp(and_, [](ReturnT lhs_el, ReturnT rhs_el) { return lhs_el && rhs_el; })); return Status::OK(); }; - Status HandleLogicalOr(HloInstruction* logical_or, HloInstruction* lhs, - HloInstruction* rhs) override { + Status HandleOr(HloInstruction* or_, HloInstruction* lhs, + HloInstruction* rhs) override { TF_ASSIGN_OR_RETURN( - parent_->evaluated_[logical_or], - ElementWiseBinaryOp(logical_or, [](ReturnT lhs_el, ReturnT rhs_el) { + parent_->evaluated_[or_], + ElementWiseBinaryOp(or_, [](ReturnT lhs_el, ReturnT rhs_el) { return lhs_el || rhs_el; })); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 77a748163e..81bccfddbb 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1958,9 +1958,9 @@ Status HloInstruction::Visit(DfsHloVisitor* visitor) { case HloOpcode::kMinimum: return visitor->HandleMinimum(this); case HloOpcode::kAnd: - return visitor->HandleLogicalAnd(this, operands_[0], operands_[1]); + return visitor->HandleAnd(this, operands_[0], operands_[1]); case HloOpcode::kOr: - return visitor->HandleLogicalOr(this, operands_[0], operands_[1]); + return visitor->HandleOr(this, operands_[0], operands_[1]); case HloOpcode::kConcatenate: return visitor->HandleConcatenate(this, operands_); case HloOpcode::kConvert: @@ -2017,7 +2017,7 @@ Status HloInstruction::Visit(DfsHloVisitor* visitor) { case HloOpcode::kIsFinite: return visitor->HandleIsFinite(this, operands_[0]); case HloOpcode::kNot: - return visitor->HandleLogicalNot(this, operands_[0]); + return visitor->HandleNot(this, operands_[0]); case HloOpcode::kBitcast: return visitor->HandleBitcast(this); case HloOpcode::kBroadcast: diff --git a/tensorflow/compiler/xla/tests/reduce_test.cc b/tensorflow/compiler/xla/tests/reduce_test.cc index 2271f32c59..b48b3a2bdb 100644 --- a/tensorflow/compiler/xla/tests/reduce_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_test.cc @@ -120,10 +120,10 @@ class ReduceTest : public ClientLibraryTestBase { Computation reduce; if (and_reduce) { init_value = builder.ConstantR0(true); - reduce = CreateScalarLogicalAndComputation(&builder); + reduce = CreateScalarAndComputation(&builder); } else { init_value = builder.ConstantR0(false); - reduce = CreateScalarLogicalOrComputation(&builder); + reduce = CreateScalarOrComputation(&builder); } builder.Reduce(pred_values, init_value, reduce, /*dimensions_to_reduce=*/{0}); @@ -729,16 +729,14 @@ XLA_TEST_F(ReduceTest, VectorizedReduce_Min) { std::numeric_limits::max()); } -XLA_TEST_F(ReduceTest, VectorizedReduce_LogicalAnd) { - RunVectorizedReduceTestForType(CreateScalarLogicalAndComputation, - [](bool a, bool b) { return a && b; }, - true); +XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanAnd) { + RunVectorizedReduceTestForType( + CreateScalarAndComputation, [](bool a, bool b) { return a && b; }, true); } -XLA_TEST_F(ReduceTest, VectorizedReduce_LogicalOr) { - RunVectorizedReduceTestForType(CreateScalarLogicalOrComputation, - [](bool a, bool b) { return a || b; }, - false); +XLA_TEST_F(ReduceTest, VectorizedReduce_BooleanOr) { + RunVectorizedReduceTestForType( + CreateScalarOrComputation, [](bool a, bool b) { return a || b; }, false); } class ReduceR3ToR2Test : public ReduceTest, -- GitLab From 0ac688a18cc56816d8c767f7fcbce97b05b2319e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 13:21:22 -0700 Subject: [PATCH 191/909] Adding a binary classification example PiperOrigin-RevId: 171577979 --- tensorflow/contrib/boosted_trees/README.md | 11 ++ .../boosted_trees/examples/binary_mnist.py | 169 ++++++++++++++++++ .../contrib/boosted_trees/examples/boston.py | 2 - 3 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/boosted_trees/README.md create mode 100644 tensorflow/contrib/boosted_trees/examples/binary_mnist.py diff --git a/tensorflow/contrib/boosted_trees/README.md b/tensorflow/contrib/boosted_trees/README.md new file mode 100644 index 0000000000..9ce700f1a1 --- /dev/null +++ b/tensorflow/contrib/boosted_trees/README.md @@ -0,0 +1,11 @@ +# TF Boosted Trees (TFBT) + +TF Boosted trees is an implementation of a gradient boosting algorithm with +trees used as week learners. + +## Examples +Folder "examples" demonstrates how TFBT estimators can be used for various +problems. Namely, it contains: +* binary_mnist.py - an example on how to use TFBT for binary classification. +* mnist.py - a multiclass example. +* boston.py - a regression example. \ No newline at end of file diff --git a/tensorflow/contrib/boosted_trees/examples/binary_mnist.py b/tensorflow/contrib/boosted_trees/examples/binary_mnist.py new file mode 100644 index 0000000000..9be362f5c8 --- /dev/null +++ b/tensorflow/contrib/boosted_trees/examples/binary_mnist.py @@ -0,0 +1,169 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Demonstrates multiclass MNIST TF Boosted trees example. + + This example demonstrates how to run experiments with TF Boosted Trees on + a binary dataset. We use digits 4 and 9 from the original MNIST dataset. + + Example Usage: + python tensorflow/contrib/boosted_trees/examples/binary_mnist.py \ + --output_dir="/tmp/binary_mnist" --depth=4 --learning_rate=0.3 \ + --batch_size=10761 --examples_per_layer=10761 --eval_batch_size=1030 \ + --num_eval_steps=1 --num_trees=10 --l2=1 --vmodule=training_ops=1 \ + + When training is done, accuracy on eval data is reported. Point tensorboard + to the directory for the run to see how the training progresses: + + tensorboard --logdir=/tmp/binary_mnist + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys + +import numpy as np +import tensorflow as tf +from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier +from tensorflow.contrib.boosted_trees.proto import learner_pb2 +from tensorflow.contrib.learn import learn_runner + + +def get_input_fn(data, + batch_size, + capacity=10000, + min_after_dequeue=3000): + """Input function over MNIST data.""" + # Keep only 4 and 9 digits. + ids = np.where((data.labels == 4) | (data.labels == 9)) + images = data.images[ids] + labels = data.labels[ids] + # Make digit 4 label 0, 9 is 1. + labels = labels == 4 + + def _input_fn(): + """Prepare features and labels.""" + images_batch, labels_batch = tf.train.shuffle_batch( + tensors=[images, + labels.astype(np.int32)], + batch_size=batch_size, + capacity=capacity, + min_after_dequeue=min_after_dequeue, + enqueue_many=True, + num_threads=4) + features_map = {"images": images_batch} + return features_map, labels_batch + + return _input_fn + + +# Main config - creates a TF Boosted Trees Estimator based on flags. +def _get_tfbt(output_dir): + """Configures TF Boosted Trees estimator based on flags.""" + learner_config = learner_pb2.LearnerConfig() + + learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate + learner_config.regularization.l1 = 0.0 + learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer + learner_config.constraints.max_tree_depth = FLAGS.depth + + growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER + learner_config.growing_mode = growing_mode + run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) + + # Create a TF Boosted trees estimator that can take in custom loss. + estimator = GradientBoostedDecisionTreeClassifier( + learner_config=learner_config, + examples_per_layer=FLAGS.examples_per_layer, + model_dir=output_dir, + num_trees=FLAGS.num_trees, + center_bias=False, + config=run_config) + return estimator + + +def _make_experiment_fn(output_dir): + """Creates experiment for gradient boosted decision trees.""" + data = tf.contrib.learn.datasets.mnist.load_mnist() + train_input_fn = get_input_fn(data.train, FLAGS.batch_size) + eval_input_fn = get_input_fn(data.validation, FLAGS.eval_batch_size) + + return tf.contrib.learn.Experiment( + estimator=_get_tfbt(output_dir), + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + train_steps=None, + eval_steps=FLAGS.num_eval_steps, + eval_metrics=None) + + +def main(unused_argv): + learn_runner.run( + experiment_fn=_make_experiment_fn, + output_dir=FLAGS.output_dir, + schedule="train_and_evaluate") + + +if __name__ == "__main__": + tf.logging.set_verbosity(tf.logging.INFO) + parser = argparse.ArgumentParser() + # Define the list of flags that users can change. + parser.add_argument( + "--output_dir", + type=str, + required=True, + help="Choose the dir for the output.") + parser.add_argument( + "--batch_size", + type=int, + default=1000, + help="The batch size for reading data.") + parser.add_argument( + "--eval_batch_size", + type=int, + default=1000, + help="Size of the batch for eval.") + parser.add_argument( + "--num_eval_steps", + type=int, + default=1, + help="The number of steps to run evaluation for.") + # Flags for gradient boosted trees config. + parser.add_argument( + "--depth", type=int, default=4, help="Maximum depth of weak learners.") + parser.add_argument( + "--l2", type=float, default=1.0, help="l2 regularization per batch.") + parser.add_argument( + "--learning_rate", + type=float, + default=0.1, + help="Learning rate (shrinkage weight) with which each new tree is added." + ) + parser.add_argument( + "--examples_per_layer", + type=int, + default=1000, + help="Number of examples to accumulate stats for per layer.") + parser.add_argument( + "--num_trees", + type=int, + default=None, + required=True, + help="Number of trees to grow before stopping.") + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/boosted_trees/examples/boston.py b/tensorflow/contrib/boosted_trees/examples/boston.py index 0cb9e956ef..2c0a3c4912 100644 --- a/tensorflow/contrib/boosted_trees/examples/boston.py +++ b/tensorflow/contrib/boosted_trees/examples/boston.py @@ -44,8 +44,6 @@ from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.layers.python.layers import feature_column from tensorflow.contrib.learn import learn_runner -_TEST_SPLIT_RATIO = 0.2 -_TEST_SPLIT_SEED = 42 _BOSTON_NUM_FEATURES = 13 -- GitLab From 7e4e336ce5b874fadf8024b6a9c90e1bc8ed2867 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Mon, 9 Oct 2017 13:31:15 -0700 Subject: [PATCH 192/909] Relanding change to add config to enable S3 file system support. Pass --config=s3 argument to Bazel to build with S3 file system support. Change was originally rolled back due to a failure it caused in //tensorflow/core/kernels:control_flow_ops_test on Macs which is now fixed. PiperOrigin-RevId: 171579378 --- configure.py | 2 ++ tensorflow/BUILD | 6 ++++++ tensorflow/core/platform/default/build_config.bzl | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/configure.py b/configure.py index 9ca614f8f9..9da49b628d 100644 --- a/configure.py +++ b/configure.py @@ -991,6 +991,8 @@ def main(): 'with_gcp_support', False, 'gcp') set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', 'with_hdfs_support', False, 'hdfs') + set_build_var(environ_cp, 'TF_NEED_S3', 'Amazon S3 File System', + 'with_s3_support', True, 's3') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 56d0939023..1620bb5f2a 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -185,6 +185,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_s3_support", + values = {"define": "with_s3_support=true"}, + visibility = ["//visibility:public"], +) + config_setting( name = "with_xla_support", values = {"define": "with_xla_support=true"}, diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 51d37291ee..2c14ea917c 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -465,6 +465,11 @@ def tf_additional_core_deps(): "//tensorflow/core/platform/hadoop:hadoop_file_system", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_s3_support": [ + "//tensorflow/contrib/s3:s3_file_system", + ], + "//conditions:default": [], }) # TODO(jart, jhseu): Delete when GCP is default on. -- GitLab From 7c74d2f68a9d4737c85606c41435555189d3dc44 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 9 Oct 2017 13:44:11 -0700 Subject: [PATCH 193/909] Expose tfe.test, tfe.in_eager_mode, tfe.in_graph_mode All are useful for library writers. PiperOrigin-RevId: 171581311 --- tensorflow/contrib/eager/python/tfe.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index 249aaebea2..fbdc576739 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -47,6 +47,9 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@SummaryWriter @@restore_variables_on_create @@Variable + +@@in_eager_mode +@@in_graph_mode """ from __future__ import absolute_import @@ -65,6 +68,8 @@ from tensorflow.python.eager import backprop from tensorflow.python.eager.custom_gradient import custom_gradient from tensorflow.python.eager import function from tensorflow.python.eager.context import enable_eager_execution +from tensorflow.python.eager.context import in_eager_mode +from tensorflow.python.eager.context import in_graph_mode from tensorflow.python.eager.context import list_devices from tensorflow.python.eager.context import num_gpus from tensorflow.python.eager.context import run -- GitLab From be69f13a074013a9c0322822e83b6320ef6c52bc Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 9 Oct 2017 14:21:44 -0700 Subject: [PATCH 194/909] [TF:XLA] Fix broken build of xla_interpreter_device. PiperOrigin-RevId: 171586211 --- tensorflow/compiler/jit/xla_interpreter_device.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc index 4e4cbe200a..2614deefd8 100644 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ b/tensorflow/compiler/jit/xla_interpreter_device.cc @@ -42,9 +42,9 @@ Status XlaInterpreterDeviceFactory::CreateDevices( (void)registrations; std::unique_ptr device; - TF_RETURN_IF_ERROR(XlaDevice::Create("Interpreter", DEVICE_XLA_INTERPRETER, 0, - DEVICE_INTERPRETER_XLA_JIT, options, - name_prefix, &device)); + TF_RETURN_IF_ERROR(XlaDevice::Create( + "Interpreter", DEVICE_XLA_INTERPRETER, 0, DEVICE_INTERPRETER_XLA_JIT, + options, name_prefix, /*register_device_for_compilation=*/true, &device)); devices->push_back(device.release()); return Status::OK(); } -- GitLab From 33d55122d994d12f2a066f9ec4f0f03094a59579 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 9 Oct 2017 15:18:44 -0700 Subject: [PATCH 195/909] [Grappler] Fixed two bugs in ArithmeticOptimizer. 1. The data type of Mul should be stored in key "T" instead of "dtype". 2. Add consumer_of_mul to new_nodes because it is modified. This caused Grappler to miss some optimizations. PiperOrigin-RevId: 171594972 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../optimizers/arithmetic_optimizer.cc | 3 +- .../optimizers/arithmetic_optimizer_test.cc | 53 +++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index c4def6cf23..06a62f2a00 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -178,6 +178,7 @@ tf_cc_test( srcs = ["arithmetic_optimizer_test.cc"], deps = [ ":arithmetic_optimizer", + ":constant_folding", ":model_pruner", "//tensorflow/cc:cc_ops", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 5c9073f049..3ec62b5a00 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -465,7 +465,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( scaled_weights->set_name(weights->name() + "_scaled"); scaled_weights->set_op("Mul"); scaled_weights->set_device(weights->device()); - (*scaled_weights->mutable_attr())["dtype"] = + (*scaled_weights->mutable_attr())["T"] = weights->attr().at("dtype"); node_map->AddNode(scaled_weights->name(), scaled_weights); new_nodes->push_back(scaled_weights); @@ -490,6 +490,7 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( consumer_of_mul->set_input(0, mul->input(0)); node_map->UpdateInput(consumer_of_mul->name(), mul->name(), other->name()); + new_nodes->push_back(consumer_of_mul); return conv->name(); } } diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 7965419ea2..234c096073 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -397,6 +398,58 @@ TEST_F(ArithmeticOptimizerTest, FoldMulToConv) { CHECK_EQ(node_map.GetNode(NodeName(folded_conv->input(1)))->op(), "Mul"); } +TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { + // This unit test exercises two optimizations, folding mul into conv, and + // reordering cast and transpose. + // + // Conv2D(Transpose(Mul(Cast(I), S)), W) + // => + // Conv2D(Transpose(Cast(I)), W*S) + // => + // Conv2D(Cast(Transpose(I)), W*S) + tensorflow::Scope s = tensorflow::Scope::NewRootScope().WithDevice("/gpu:0"); + Output inputs = + ops::Placeholder(s, DT_UINT8, ops::Placeholder::Shape({8, 28, 28, 3})); + Output cast = ops::Cast(s, inputs, DT_FLOAT); + Output mul = ops::Mul(s, cast, ops::Const(s, 1.0f / 255.0f)); + Output transpose = + ops::Transpose(s, mul, ops::Const(s.WithOpName("perm"), {0, 3, 1, 2})); + Output weights = ops::Const(s.WithOpName("weights"), + Input::Initializer(127.0f, {5, 5, 3, 16})); + Output conv = ops::Conv2D(s, transpose, weights, {1, 1, 1, 1}, "VALID", + ops::Conv2D::DataFormat("NCHW")); + Output outputs = ops::Identity(s.WithOpName("outputs"), conv); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK( + ConstantFolding(/*cpu_device=*/nullptr).Optimize(nullptr, item, &output)); + + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + NodeMap node_map(&output); + const NodeDef* inputs_node = CHECK_NOTNULL(node_map.GetNode("Placeholder")); + const NodeDef* transpose_node = + CHECK_NOTNULL(node_map.GetNode("Transpose_uint8")); + const NodeDef* cast_node = CHECK_NOTNULL(node_map.GetNode("Cast_new")); + const NodeDef* weights_node = + CHECK_NOTNULL(node_map.GetNode("weights_scaled")); + const NodeDef* conv_node = CHECK_NOTNULL(node_map.GetNode("Conv2D")); + + EXPECT_EQ(output.node_size(), 7); + EXPECT_EQ(transpose_node->input(0), inputs_node->name()); + EXPECT_EQ(cast_node->input(0), transpose_node->name()); + EXPECT_EQ(conv_node->input(0), cast_node->name()); + EXPECT_EQ(conv_node->input(1), weights_node->name()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 88145023cea47b4a96cc04f8febe205d50a0d0d6 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 9 Oct 2017 16:24:05 -0700 Subject: [PATCH 196/909] Removing side outputs from tape code. They belong better in future function objects (simplifies tape move to C) PiperOrigin-RevId: 171603665 --- tensorflow/python/eager/backprop.py | 2 +- tensorflow/python/eager/custom_gradient.py | 1 - tensorflow/python/eager/function.py | 8 +++++--- tensorflow/python/eager/imperative_grad.py | 3 +-- tensorflow/python/eager/tape.py | 19 +++---------------- tensorflow/python/framework/ops.py | 2 +- 6 files changed, 11 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index cca8e47044..554b9a818c 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -281,7 +281,7 @@ def _record_gradient(op_name, inputs, attrs, results, name): "output_grads", orig_outputs, "gradients", result) return nest.flatten(result) - tape.record_operation(op_name, results, inputs, [], grad_fn) + tape.record_operation(op_name, results, inputs, grad_fn) if _tracing: print("Computed op", (name if name else op_name), "inputs", inputs, "outputs", results) diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 4360e53225..87348e87b1 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -85,7 +85,6 @@ def custom_gradient(f): f.__name__, flat_result, input_tensors, - [], actual_grad_fn) flat_result = list(flat_result) return result diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 8a1936b3fe..da49517cf9 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -109,7 +109,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False): tensor_map[ops.tensor_id(value)] = (value, captured_value) else: captured_value = captured_value[1] - tape.record_operation("captured_value", [captured_value], [value], [], + tape.record_operation("captured_value", [captured_value], [value], lambda x: [x]) return captured_value @@ -288,12 +288,14 @@ class _GraphModeFunction(object): real_outputs = outputs[:len(self._returns)] side_outputs = outputs[len(self._returns):] + def backward_function(*args): + return self._backward_function(*(list(args) + side_outputs)) + tape.record_operation( signature.name, real_outputs, (args + self._extra_inputs), - side_outputs, - self._backward_function) + backward_function) return self._build_call_outputs(self._returns, real_outputs) diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index b81f5bba14..ab6eb87a07 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -182,8 +182,7 @@ def imperative_grad( else: out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) - in_gradients = op_trace.backward_function( - *(out_gradients + op_trace.side_outputs)) + in_gradients = op_trace.backward_function(*(out_gradients)) for i, t in enumerate(op_trace.input_ids): if in_gradients[i] is not None: vspace.add_new_grads_fn(gradients, gradients_size, t, in_gradients[i]) diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index 84814d48fd..4578a7190d 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -32,7 +32,7 @@ def tid(tensor): class TapeEntry( collections.namedtuple("TapeEntry", [ "op_type", - "output_ids", "input_ids", "side_outputs", "backward_function", + "output_ids", "input_ids", "backward_function", "output_shape_and_dtype", ])): """Entry in the gradient tape. @@ -43,8 +43,6 @@ class TapeEntry( Args: output_ids: tensor_id(t) for each output tensor T input_ids: tensor_id(t) for each input tensor T - side_outputs: optional tensors (not IDs) which need to be provided to the - backward function. backward_function: function to be called with the downstream gradients and side outputs as arguments which computes the backward pass. output_shape_and_dtype: a list of (shape_tuple, dtype) for every output @@ -69,8 +67,6 @@ class Tape(object): self._op_tape = {} # next operation ID self._next_op_id = 0 - # List of directly watched tensors - self._watched = [] # Set of directly watched variables self._watched_variables = set() @@ -91,14 +87,13 @@ class Tape(object): if i not in self._tensor_tape: self._tensor_tape[i] = None self._tensor_usage[i] = 1 - self._watched.append(tensor) def watch_variable(self, v): self._watched_variables.add(v) self.watch(v.handle) def record_operation(self, op_type, output_tensors, input_tensors, - side_outputs, backward_function): + backward_function): """Records an operation in the tape.""" if not self.should_record(input_tensors): return output_tensors @@ -113,7 +108,6 @@ class Tape(object): op_type, [tid(t) for t in output_tensors], [tid(t) for t in input_tensors], - side_outputs, backward_function, [(_tensor_shape(t), t.dtype) for t in output_tensors]) self._next_op_id += 1 @@ -227,13 +221,11 @@ def should_record(tensors): return any(x.should_record(tensors) for x in _tape_stack.stack) -def record_operation(op_type, output_tensors, input_tensors, side_outputs, - backward_function): +def record_operation(op_type, output_tensors, input_tensors, backward_function): """Records the operation on all tapes in the stack.""" for t in _tape_stack.stack: t.record_operation(op_type, output_tensors, input_tensors, - side_outputs, backward_function) @@ -243,11 +235,6 @@ def delete_trace(tensor_id): t.delete_trace(tensor_id) -def top_tape_watched_tensors(): - t = _tape_stack.stack[-1] - return t._watched # pylint: disable=protected-access - - def top_tape_watched_variables(): t = _tape_stack.stack[-1] return t._watched_variables # pylint: disable=protected-access diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 669588ace0..7f5f60e599 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -679,7 +679,7 @@ class _EagerTensorBase(Tensor): self_device = self.device def grad_fun(dresult): return [dresult._copy(device_name=self_device)] - tape.record_operation("_copy", [new_tensor], [self], [], grad_fun) + tape.record_operation("_copy", [new_tensor], [self], grad_fun) return new_tensor # pylint: enable=protected-access -- GitLab From f49f6cd1758b9ecc92eedd377983e8047b05d964 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 16:39:21 -0700 Subject: [PATCH 197/909] Replace CHECK() with a WARNING in StepStatsCollector so that Save after Finalize won't crash. PiperOrigin-RevId: 171605724 --- tensorflow/core/common_runtime/step_stats_collector.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index e7f58f9ecf..e6403df97f 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -253,7 +253,9 @@ void StepStatsCollector::Save(const string& device, VLOG(1) << "Save dev " << device << " nt " << stats->stats(); { mutex_lock l(mu_); - CHECK(!finalized_); + if (finalized_) { + LOG(WARNING) << "stats saved after finalize will not be collected."; + } if (!step_stats_ || collectedNodes >= kMaxCollectedNodes) { VLOG(1) << "step_stats_ nullptr or already collected too many nodes."; delete stats; -- GitLab From 0cbd8c74a3c4833733d7e69ff31c3e7ba50cc413 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 17:01:17 -0700 Subject: [PATCH 198/909] New CUDA kernel for LSTMBlockCell's forward propagation. PiperOrigin-RevId: 171608367 --- tensorflow/contrib/rnn/kernels/lstm_ops.cc | 82 ++++++- tensorflow/contrib/rnn/kernels/lstm_ops.h | 82 ------- .../contrib/rnn/kernels/lstm_ops_gpu.cu.cc | 202 +++++++++++++++++- 3 files changed, 279 insertions(+), 87 deletions(-) diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc index ffeb9953c5..2b56c6f95a 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc @@ -41,6 +41,86 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { +template +void LSTMBlockCellFpropWithEigen( + const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d, + const T forget_bias, const T cell_clip, bool use_peephole, + typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, typename TTypes::ConstVec b, + typename TTypes::Matrix xh, typename TTypes::Matrix i, + typename TTypes::Matrix cs, typename TTypes::Matrix f, + typename TTypes::Matrix o, typename TTypes::Matrix ci, + typename TTypes::Matrix co, typename TTypes::Matrix icfo, + typename TTypes::Matrix h) { + // Concat xh = [x, h]. + xh.slice(cell.xh_x_offsets(), cell.xh_x_extents()).device(d) = x; + xh.slice(cell.xh_h_offsets(), cell.xh_h_extents()).device(d) = h_prev; + + // states1 = xh * w + b + typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); + TensorBlasGemm::compute( + ctx, d, false, false, T(1), const_xh, w, T(0), icfo); + Eigen::array b_shape({1, b.dimensions()[0]}); + Eigen::array broadcast_shape({cell.batch_size(), 1}); + icfo.device(d) += b.reshape(b_shape).broadcast(broadcast_shape); + + Eigen::array p_shape({1, cell.cell_size()}); + Eigen::array p_broadcast_shape({cell.batch_size(), 1}); + + // Input gate. + if (use_peephole) { + auto i_peep = cs_prev * wci.reshape(p_shape).broadcast(p_broadcast_shape); + i.device(d) = + (icfo.slice(cell.icfo_i_offsets(), cell.cell_extents()) + i_peep) + .sigmoid(); + } else { + i.device(d) = + icfo.slice(cell.icfo_i_offsets(), cell.cell_extents()).sigmoid(); + } + + // Cell input. + ci.device(d) = icfo.slice(cell.icfo_c_offsets(), cell.cell_extents()).tanh(); + + // Forget gate (w/ bias). + if (use_peephole) { + auto f_peep = cs_prev * wcf.reshape(p_shape).broadcast(p_broadcast_shape); + f.device(d) = (icfo.slice(cell.icfo_f_offsets(), cell.cell_extents()) + + f.constant(forget_bias) + f_peep) + .sigmoid(); + } else { + f.device(d) = (icfo.slice(cell.icfo_f_offsets(), cell.cell_extents()) + + f.constant(forget_bias)) + .sigmoid(); + } + + // cs = ci .* i + f .* cs_prev + cs.device(d) = i * ci + f * cs_prev; + + if (cell_clip > 0.0f) { + cs.device(d) = + cs.binaryExpr(cs.constant(cell_clip), Eigen::scalar_clip_op()); + } + + // co = tanh(cs) + co.device(d) = cs.tanh(); + + // Output gate. + if (use_peephole) { + auto o_peep = cs * wco.reshape(p_shape).broadcast(p_broadcast_shape); + o.device(d) = + (icfo.slice(cell.icfo_o_offsets(), cell.cell_extents()) + o_peep) + .sigmoid(); + } else { + o.device(d) = + icfo.slice(cell.icfo_o_offsets(), cell.cell_extents()).sigmoid(); + } + + // h = o .* co + h.device(d) = o * co; +} + #define DEFINE_CPU_SPECS(T) \ template <> \ void LSTMBlockCellFprop::operator()( \ @@ -55,7 +135,7 @@ namespace functor { typename TTypes::Matrix f, typename TTypes::Matrix o, \ typename TTypes::Matrix ci, typename TTypes::Matrix co, \ typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithEigen( \ + LSTMBlockCellFpropWithEigen( \ *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ } \ diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h index 30a4b44706..53641ff47e 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.h +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h @@ -169,88 +169,6 @@ struct LSTMBlockCellFprop : public LSTMBlockCell { typename TTypes::Matrix h); }; -// TODO(b/63339763): Once GPUDevice implementation no longer relies on Eigen, -// move into lstm_ops.cc. -template -void LSTMBlockCellFpropWithEigen( - const LSTMBlockCell& cell, OpKernelContext* ctx, const Device& d, - const T forget_bias, const T cell_clip, bool use_peephole, - typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, - typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, - typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, - typename TTypes::ConstVec wco, typename TTypes::ConstVec b, - typename TTypes::Matrix xh, typename TTypes::Matrix i, - typename TTypes::Matrix cs, typename TTypes::Matrix f, - typename TTypes::Matrix o, typename TTypes::Matrix ci, - typename TTypes::Matrix co, typename TTypes::Matrix icfo, - typename TTypes::Matrix h) { - // Concat xh = [x, h]. - xh.slice(cell.xh_x_offsets(), cell.xh_x_extents()).device(d) = x; - xh.slice(cell.xh_h_offsets(), cell.xh_h_extents()).device(d) = h_prev; - - // states1 = xh * w + b - typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); - TensorBlasGemm::compute(ctx, d, false, false, T(1), - const_xh, w, T(0), icfo); - Eigen::array b_shape({1, b.dimensions()[0]}); - Eigen::array broadcast_shape({cell.batch_size(), 1}); - icfo.device(d) += b.reshape(b_shape).broadcast(broadcast_shape); - - Eigen::array p_shape({1, cell.cell_size()}); - Eigen::array p_broadcast_shape({cell.batch_size(), 1}); - - // Input gate. - if (use_peephole) { - auto i_peep = cs_prev * wci.reshape(p_shape).broadcast(p_broadcast_shape); - i.device(d) = - (icfo.slice(cell.icfo_i_offsets(), cell.cell_extents()) + i_peep) - .sigmoid(); - } else { - i.device(d) = - icfo.slice(cell.icfo_i_offsets(), cell.cell_extents()).sigmoid(); - } - - // Cell input. - ci.device(d) = icfo.slice(cell.icfo_c_offsets(), cell.cell_extents()).tanh(); - - // Forget gate (w/ bias). - if (use_peephole) { - auto f_peep = cs_prev * wcf.reshape(p_shape).broadcast(p_broadcast_shape); - f.device(d) = (icfo.slice(cell.icfo_f_offsets(), cell.cell_extents()) + - f.constant(forget_bias) + f_peep) - .sigmoid(); - } else { - f.device(d) = (icfo.slice(cell.icfo_f_offsets(), cell.cell_extents()) + - f.constant(forget_bias)) - .sigmoid(); - } - - // cs = ci .* i + f .* cs_prev - cs.device(d) = i * ci + f * cs_prev; - - if (cell_clip > 0.0f) { - cs.device(d) = - cs.binaryExpr(cs.constant(cell_clip), Eigen::scalar_clip_op()); - } - - // co = tanh(cs) - co.device(d) = cs.tanh(); - - // Output gate. - if (use_peephole) { - auto o_peep = cs * wco.reshape(p_shape).broadcast(p_broadcast_shape); - o.device(d) = - (icfo.slice(cell.icfo_o_offsets(), cell.cell_extents()) + o_peep) - .sigmoid(); - } else { - o.device(d) = - icfo.slice(cell.icfo_o_offsets(), cell.cell_extents()).sigmoid(); - } - - // h = o .* co - h.device(d) = o * co; -} - // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for // GPUDevice implementation. template diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc index e18f8079a3..90990fe452 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc @@ -20,15 +20,208 @@ limitations under the License. #include "tensorflow/contrib/rnn/kernels/lstm_ops.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/eigen_activations.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" namespace tensorflow { namespace functor { typedef Eigen::GpuDevice GPUDevice; +namespace { + +// Adds bias, applies non-linearities and gates. +// +// Launch with a 2D setup such that there is one thread per (example, +// activation) with 'x' governing example index and 'y' governing activation. +// +// Launch with blocks of (batch x 32) +// +// TODO(b/67600500): Try making 'use_peephole' a template parameter. +template +__global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, + const T* wci, const T* wcf, const T* wco, T* o, T* h, + T* ci, T* cs, T* co, T* i, T* f, const T forget_bias, + const T cell_clip, const bool use_peephole, + const int batch_size, const int cell_size) { + const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; + const int act_id = blockIdx.y * blockDim.y + threadIdx.y; + + if (batch_id >= batch_size || act_id >= cell_size) return; + + // The following code assumes the input arrays are of the following + // shapes and interpretations. + // + // 1) 'icfo' is a matrix such that, + // + // cell_size cell_size cell_size cell_size + // +----------+----------+----------+----------+ + // | | | | | + // | i | c | f | o | batch_size + // | | | | | + // +----------+----------+----------+----------+ + // + // 'gid' is the index assigned to this thread for 'icfo' in the 'i' submatrix. + // + // 2) 'b' is a vector such that, + // + // cell_size cell_size cell_size cell_size + // +----------+----------+----------+----------+ + // | i | c | f | o | 1 + // +----------+----------+----------+----------+ + // + // 'act_id' is the index assigned to this thread for 'b' in the 'i' subvector. + // + // 3) 'wc{i,f,o}' are vectors such that, + // + // cell_size + // +----------+ + // | i | 1 + // +----------+ + // + // 'act_id' is the index to this thread. + // + // 4) All other matrices have the form, + // + // cell_size + // +----------+ + // | | + // | i | batch_size + // | | + // +----------+ + // + // 'cid' is the index assigned to this thread. + // + const int gid = batch_id * cell_size * 4 + act_id; + const int cid = batch_id * cell_size + act_id; + Eigen::internal::scalar_sigmoid_op sigmoid_op; + Eigen::internal::scalar_tanh_op tanh_op; + Eigen::scalar_clip_op clip_op; + + T i_local; + if (use_peephole) { + i_local = sigmoid_op(icfo[0 * cell_size + gid] + b[0 * cell_size + act_id] + + cs_prev[cid] * wci[act_id]); + } else { + i_local = sigmoid_op(icfo[0 * cell_size + gid] + b[0 * cell_size + act_id]); + } + i[cid] = i_local; + + T ci_local = tanh_op(icfo[1 * cell_size + gid] + b[1 * cell_size + act_id]); + ci[cid] = ci_local; + + T f_local; + if (use_peephole) { + f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + + forget_bias + cs_prev[cid] * wcf[act_id]); + } else { + f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + + forget_bias); + } + f[cid] = f_local; + + T cs_local = i_local * ci_local + f_local * cs_prev[cid]; + if (cell_clip > 0.0) { + cs_local = clip_op(cs_local, cell_clip); + } + cs[cid] = cs_local; + + T co_local = tanh_op(cs_local); + co[cid] = co_local; + + T o_local; + if (use_peephole) { + o_local = sigmoid_op(icfo[3 * cell_size + gid] + b[3 * cell_size + act_id] + + cs_local * wco[act_id]); + } else { + o_local = sigmoid_op(icfo[3 * cell_size + gid] + b[3 * cell_size + act_id]); + } + o[cid] = o_local; + + h[cid] = o_local * co_local; +} + +// Concatenate 'x' and 'h' and copy their contents into 'xh'. +template +__global__ void concat_xh(T* xh, const T* x, const T* h_prev, + const int batch_size, const int cell_size, + const int input_size) { + // Assumes 'x', 'h', and 'xh' are of the following shape, + // + // input_size cell_size + // +----------+----------+ + // | | | + // | x | h | batch_size + // | | | + // +----------+----------+ + // + const int gid = blockDim.x * blockIdx.x + threadIdx.x; + const int width = input_size + cell_size; + + if (gid >= width * batch_size) return; + + const int output_row = gid / width; + const int output_col = gid % width; + + if (output_col < input_size) { // x + xh[gid] = x[output_row * input_size + output_col]; + } else { // h + xh[gid] = h_prev[output_row * cell_size + output_col - input_size]; + } +} + +template +void LSTMBlockCellFpropWithCUDA( + OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, + const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, typename TTypes::ConstVec b, + typename TTypes::Matrix xh, typename TTypes::Matrix i, + typename TTypes::Matrix cs, typename TTypes::Matrix f, + typename TTypes::Matrix o, typename TTypes::Matrix ci, + typename TTypes::Matrix co, typename TTypes::Matrix icfo, + typename TTypes::Matrix h, int batch_size, int cell_size, + int input_size) { + const cudaStream_t& cu_stream = GetCudaStream(ctx); + + // Concatenate xh = [x, h]. + // + // Each block is assigned 128 threads. Good values are in [128, 1024] and are + // divisible by 32 (the size of a warp). The number of blocks is such that + // there are enough to process all the data. + const int block_dim = 128; + const int grid_dim = + Eigen::divup(batch_size * (cell_size + input_size), block_dim); + concat_xh<<>>( + xh.data(), x.data(), h_prev.data(), batch_size, cell_size, input_size); + + // states1 = xh * w + typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); + TensorBlasGemm::compute( + ctx, d, false, false, T(1), const_xh, w, T(0), icfo); + + // Add bias, apply non-linearities and gating. + // + // Use 2D blocks. The number of threads per block is equal to x * y, where x = + // min(batch_size, 8) and y = 32. See above for guidance on number of + // threads. + dim3 block_dim_2d(min(batch_size, 8), 32); + dim3 grid_dim_2d(Eigen::divup(batch_size, static_cast(block_dim_2d.x)), + Eigen::divup(cell_size, static_cast(block_dim_2d.y))); + + lstm_gates<<>>( + icfo.data(), b.data(), cs_prev.data(), wci.data(), wcf.data(), wco.data(), + o.data(), h.data(), ci.data(), cs.data(), co.data(), i.data(), f.data(), + forget_bias, cell_clip, use_peephole, batch_size, cell_size); +} + +} // namespace + // TODO(b/63339763): Provide an alternative implementation for -// LSTMBlockCell{F,B}prop that doesn't rely on Eigen. +// LSTMBlockCellBprop that doesn't rely on Eigen. #define DEFINE_GPU_SPECS(T) \ template struct TensorZero; \ template struct TensorUnalignedZero; \ @@ -49,9 +242,10 @@ typedef Eigen::GpuDevice GPUDevice; typename TTypes::Matrix f, typename TTypes::Matrix o, \ typename TTypes::Matrix ci, typename TTypes::Matrix co, \ typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithEigen( \ - *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ - h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ + LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole, \ + x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \ + cs, f, o, ci, co, icfo, h, batch_size_, \ + cell_size_, input_size_); \ } \ template <> \ void LSTMBlockCellBprop::operator()( \ -- GitLab From 319d823a09e8c3f1c0850b9d146f7e4d7e5bd310 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 9 Oct 2017 17:01:25 -0700 Subject: [PATCH 199/909] TFE: Fix reference counts when copying to Numpy arrays. PiperOrigin-RevId: 171608395 --- tensorflow/python/eager/pywrap_tensor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 18337bdd45..157e87d387 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -326,6 +326,9 @@ void EagerTensor_dealloc(EagerTensor* self) { Py_DECREF(self->keras_mask); TFE_DeleteTensorHandle(self->handle); self->handle = nullptr; + // We have the global interpreter lock, so use this chance to perform delayed + // refcount decrements. + tensorflow::ClearDecrefCache(); PyObject* id = PyLong_FromLongLong(self->id); PyObject* func = PyObject_GetAttrString(reinterpret_cast(self), "_delete_trace"); -- GitLab From 3a52d39b41486d2c7d19a47e5a246b6a446aa76c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 17:01:17 -0700 Subject: [PATCH 200/909] New CUDA kernel for LSTMBlockCell's forward propagation. PiperOrigin-RevId: 171608367 --- tensorflow/python/eager/pywrap_tensor.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 157e87d387..18337bdd45 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -326,9 +326,6 @@ void EagerTensor_dealloc(EagerTensor* self) { Py_DECREF(self->keras_mask); TFE_DeleteTensorHandle(self->handle); self->handle = nullptr; - // We have the global interpreter lock, so use this chance to perform delayed - // refcount decrements. - tensorflow::ClearDecrefCache(); PyObject* id = PyLong_FromLongLong(self->id); PyObject* func = PyObject_GetAttrString(reinterpret_cast(self), "_delete_trace"); -- GitLab From fdb2b12d1ad84392df09dc5dcd457ca7e96cb423 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 9 Oct 2017 17:01:25 -0700 Subject: [PATCH 201/909] TFE: Fix reference counts when copying to Numpy arrays. PiperOrigin-RevId: 171608395 --- tensorflow/python/eager/pywrap_tensor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 18337bdd45..157e87d387 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -326,6 +326,9 @@ void EagerTensor_dealloc(EagerTensor* self) { Py_DECREF(self->keras_mask); TFE_DeleteTensorHandle(self->handle); self->handle = nullptr; + // We have the global interpreter lock, so use this chance to perform delayed + // refcount decrements. + tensorflow::ClearDecrefCache(); PyObject* id = PyLong_FromLongLong(self->id); PyObject* func = PyObject_GetAttrString(reinterpret_cast(self), "_delete_trace"); -- GitLab From 8ff5070392bd0066930d11e3e39d21d3fa84bb2e Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 9 Oct 2017 17:05:20 -0700 Subject: [PATCH 202/909] [Grappler] Optimize bitcasts. Two optimizations: 1. If dst_type == type(x), Bitcast(x, dst_type) => No-op 2. Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) PiperOrigin-RevId: 171608976 --- .../optimizers/arithmetic_optimizer.cc | 68 ++++++++++++++++++- .../optimizers/arithmetic_optimizer_test.cc | 61 +++++++++++++++++ 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 3ec62b5a00..971163eadf 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -289,6 +289,44 @@ static DataType GetDataTypeFromAttr(const NodeDef& node, return attr.type(); } +static void SetDataTypeToAttr(DataType dtype, const string& attr_name, + NodeDef* node) { + (*node->mutable_attr())[attr_name].set_type(dtype); +} + +static string SourceDataTypeAttrName(const NodeDef& node) { + if (node.op() == "Bitcast") { + return "T"; + } else if (node.op() == "Cast") { + return "SrcT"; + } else { + LOG(FATAL) << "SourceDataTypeAttrName not implemented for op " << node.op(); + } +} + +static string DestinationDataTypeAttrName(const NodeDef& node) { + if (node.op() == "Bitcast") { + return "type"; + } else if (node.op() == "Cast") { + return "DstT"; + } else { + LOG(FATAL) << "DestinationDataTypeAttrName not implemented for op " + << node.op(); + } +} + +static DataType GetSourceDataType(const NodeDef& node) { + return GetDataTypeFromAttr(node, SourceDataTypeAttrName(node)); +} + +static DataType GetDestinationDataType(const NodeDef& node) { + return GetDataTypeFromAttr(node, DestinationDataTypeAttrName(node)); +} + +static void SetSourceDataType(DataType dtype, NodeDef* node) { + SetDataTypeToAttr(dtype, SourceDataTypeAttrName(*node), node); +} + static bool IsNumberType(DataType dtype) { DataTypeVector number_types = NumberTypes(); return std::find(number_types.begin(), number_types.end(), dtype) != @@ -369,8 +407,8 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* cast = node_map->GetNode(transpose->input(0)); if (cast->op() == "Cast") { const NodeDef* input = node_map->GetNode(cast->input(0)); - const DataType src_type = GetDataTypeFromAttr(*cast, "SrcT"); - const DataType dst_type = GetDataTypeFromAttr(*cast, "DstT"); + const DataType src_type = GetSourceDataType(*cast); + const DataType dst_type = GetDestinationDataType(*cast); if (IsNumberType(src_type) && IsNumberType(dst_type) && DataTypeSize(src_type) < DataTypeSize(dst_type)) { NodeDef* new_transpose = graph_def->add_node(); @@ -401,6 +439,32 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } + if (node->op() == "Bitcast") { + NodeDef* bitcast = node_map->GetNode(node->name()); + // Bypass bitcasts whose source type and destination type are equal. + if (GetSourceDataType(*bitcast) == GetDestinationDataType(*bitcast)) { + return bitcast->input(0); + } + + const NodeDef* operand = node_map->GetNode(bitcast->input(0)); + if (operand->op() == bitcast->op()) { + // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) + bitcast->set_input(0, operand->input(0)); + SetSourceDataType(GetSourceDataType(*operand), bitcast); + node_map->UpdateInput(bitcast->name(), bitcast->input(0), + operand->input(0)); + new_nodes->push_back(bitcast); + return bitcast->name(); + } + } + + if (node->op() == "Cast") { + // Bypass casts whose source type and destination type are equal. + if (GetSourceDataType(*node) == GetDestinationDataType(*node)) { + return node->input(0); + } + } + // Fold a multiply of a scalar into the following convolution. This folding // can jump across nodes that merely reorders data (such as reshape and // transpose). For example, we can optimize diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 234c096073..39b4999808 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -450,6 +450,67 @@ TEST_F(ArithmeticOptimizerTest, OptimizeCastMulTransposeConv) { EXPECT_EQ(conv_node->input(1), weights_node->name()); } +TEST_F(ArithmeticOptimizerTest, CombineBitcasts) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output inputs = + ops::Placeholder(s, DT_UINT8, ops::Placeholder::Shape({2, 3})); + Output bc1 = ops::Bitcast(s, inputs, DT_QINT8); + Output bc2 = ops::Bitcast(s, bc1, DT_INT8); + Output outputs = ops::Identity(s.WithOpName("outputs"), bc2); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + EXPECT_EQ(1, std::count_if( + output.node().begin(), output.node().end(), + [](const NodeDef& node) { return node.op() == "Bitcast"; })); +} + +TEST_F(ArithmeticOptimizerTest, CombineAndRemoveBitcasts) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output inputs = ops::Placeholder(s, DT_INT8, ops::Placeholder::Shape({2, 3})); + Output bc1 = ops::Bitcast(s, inputs, DT_QINT8); + Output bc2 = ops::Bitcast(s, bc1, DT_INT8); + Output outputs = ops::Identity(s.WithOpName("outputs"), bc2); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + EXPECT_EQ(0, std::count_if( + output.node().begin(), output.node().end(), + [](const NodeDef& node) { return node.op() == "Bitcast"; })); +} + +TEST_F(ArithmeticOptimizerTest, RemoveRedundantCast) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output inputs = ops::Placeholder(s, DT_INT8, ops::Placeholder::Shape({2, 3})); + Output cast = ops::Cast(s, inputs, DT_INT8); + Output outputs = ops::Identity(s.WithOpName("outputs"), cast); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; + TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); + item.graph = output; + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + + EXPECT_EQ(0, std::count_if( + output.node().begin(), output.node().end(), + [](const NodeDef& node) { return node.op() == "Cast"; })); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 319a359fba508d5012dd4d9f6362c349c7c88367 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 9 Oct 2017 17:21:55 -0700 Subject: [PATCH 203/909] Create a cuda9 cudnn 7 docker file, simpler, using ARGS. PiperOrigin-RevId: 171610904 --- .../docker/Dockerfile.devel-gpu-cuda9-cudnn7 | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 new file mode 100644 index 0000000000..ac1a437031 --- /dev/null +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu-cuda9-cudnn7 @@ -0,0 +1,107 @@ +FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 + +MAINTAINER Gunhan Gulsoy + +# It is possible to override these for releases. +ARG TF_BRANCH=master +ARG BAZEL_VERSION=0.5.4 +ARG TF_AVAILABLE_CPUS=32 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + git \ + golang \ + libcurl3-dev \ + libfreetype6-dev \ + libpng12-dev \ + libzmq3-dev \ + pkg-config \ + python-dev \ + python-pip \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + wget \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN pip --no-cache-dir install --upgrade \ + pip setuptools + +RUN pip --no-cache-dir install \ + ipykernel \ + jupyter \ + matplotlib \ + numpy \ + scipy \ + sklearn \ + pandas \ + && \ + python -m ipykernel.kernelspec + +# Set up our notebook config. +COPY jupyter_notebook_config.py /root/.jupyter/ + +# Jupyter has issues with being run directly: +# https://github.com/ipython/ipython/issues/7062 +# We just add a little wrapper script. +COPY run_jupyter.sh / + +# Set up Bazel. + +# Running bazel inside a `docker build` command causes trouble, cf: +# https://github.com/bazelbuild/bazel/issues/134 +# The easiest solution is to set up a bazelrc file forcing --batch. +RUN echo "startup --batch" >>/etc/bazel.bazelrc +# Similarly, we need to workaround sandboxing issues: +# https://github.com/bazelbuild/bazel/issues/418 +RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ + >>/etc/bazel.bazelrc +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + wget --quiet https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + wget --quiet https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \ + chmod +x bazel-*.sh && \ + ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh + +# Download and build TensorFlow. +WORKDIR / +RUN git clone https://github.com/tensorflow/tensorflow.git && \ + cd tensorflow && \ + git checkout ${TF_BRANCH} +WORKDIR /tensorflow + +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES 3.0,3.5,5.2,6.0,6.1 +ENV TF_CUDA_VERSION 9.0 +ENV TF_CUDNN_VERSION 7.0 +RUN ./configure + +RUN LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \ + bazel build -c opt --config=cuda --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + --jobs=${TF_AVAILABLE_CPUS} \ + tensorflow/tools/pip_package:build_pip_package && \ + mkdir -p /pip_pkg && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /pip_pkg + +RUN pip --no-cache-dir install --upgrade /pip_pkg/tensorflow-*.whl && \ + +WORKDIR /root + +# TensorBoard +EXPOSE 6006 +# IPython +EXPOSE 8888 + +RUN ["/bin/bash"] -- GitLab From 52d3a842463d11990600bb65f9752b59f6d8f418 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 17:22:20 -0700 Subject: [PATCH 204/909] Fix wasserstein gradient penalty name scope issue and add the proper name scope. PiperOrigin-RevId: 171610946 --- .../gan/python/losses/python/losses_impl.py | 83 ++++++++++--------- .../python/losses/python/losses_impl_test.py | 23 ++++- 2 files changed, 64 insertions(+), 42 deletions(-) diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py index b4a74fc49c..940762cf2a 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py @@ -297,7 +297,6 @@ def acgan_generator_loss( # GANs` (https://arxiv.org/abs/1704.00028). -# TODO(joelshor): Figure out why this function can't be inside a name scope. def wasserstein_gradient_penalty( real_data, generated_data, @@ -339,48 +338,50 @@ def wasserstein_gradient_penalty( Raises: ValueError: If the rank of data Tensors is unknown. """ - real_data = ops.convert_to_tensor(real_data) - generated_data = ops.convert_to_tensor(generated_data) - if real_data.shape.ndims is None: - raise ValueError('`real_data` can\'t have unknown rank.') - if generated_data.shape.ndims is None: - raise ValueError('`generated_data` can\'t have unknown rank.') - - differences = generated_data - real_data - batch_size = differences.shape[0].value or array_ops.shape(differences)[0] - alpha_shape = [batch_size] + [1] * (differences.shape.ndims - 1) - alpha = random_ops.random_uniform(shape=alpha_shape) - interpolates = real_data + (alpha * differences) - - # Reuse variables if a discriminator scope already exists. - reuse = False if discriminator_scope is None else True - with variable_scope.variable_scope(discriminator_scope, 'gpenalty_dscope', - reuse=reuse): - disc_interpolates = discriminator_fn(interpolates, generator_inputs) - - if isinstance(disc_interpolates, tuple): - # ACGAN case: disc outputs more than one tensor - disc_interpolates = disc_interpolates[0] - - gradients = gradients_impl.gradients(disc_interpolates, interpolates)[0] - gradient_squares = math_ops.reduce_sum( - math_ops.square(gradients), axis=list(range(1, gradients.shape.ndims))) - # Propagate shape information, if possible. - if isinstance(batch_size, int): - gradient_squares.set_shape([ - batch_size] + gradient_squares.shape.as_list()[1:]) - # For numerical stability, add epsilon to the sum before taking the square - # root. Note tf.norm does not add epsilon. - slopes = math_ops.sqrt(gradient_squares + epsilon) - penalties = math_ops.square(slopes - 1.0) - penalty = losses.compute_weighted_loss( - penalties, weights, scope=scope, loss_collection=loss_collection, - reduction=reduction) + with ops.name_scope(scope, 'wasserstein_gradient_penalty', + (real_data, generated_data)) as scope: + real_data = ops.convert_to_tensor(real_data) + generated_data = ops.convert_to_tensor(generated_data) + if real_data.shape.ndims is None: + raise ValueError('`real_data` can\'t have unknown rank.') + if generated_data.shape.ndims is None: + raise ValueError('`generated_data` can\'t have unknown rank.') + + differences = generated_data - real_data + batch_size = differences.shape[0].value or array_ops.shape(differences)[0] + alpha_shape = [batch_size] + [1] * (differences.shape.ndims - 1) + alpha = random_ops.random_uniform(shape=alpha_shape) + interpolates = real_data + (alpha * differences) + + with ops.name_scope(None): # Clear scope so update ops are added properly. + # Reuse variables if variables already exists. + with variable_scope.variable_scope(discriminator_scope, 'gpenalty_dscope', + reuse=variable_scope.AUTO_REUSE): + disc_interpolates = discriminator_fn(interpolates, generator_inputs) + + if isinstance(disc_interpolates, tuple): + # ACGAN case: disc outputs more than one tensor + disc_interpolates = disc_interpolates[0] + + gradients = gradients_impl.gradients(disc_interpolates, interpolates)[0] + gradient_squares = math_ops.reduce_sum( + math_ops.square(gradients), axis=list(range(1, gradients.shape.ndims))) + # Propagate shape information, if possible. + if isinstance(batch_size, int): + gradient_squares.set_shape([ + batch_size] + gradient_squares.shape.as_list()[1:]) + # For numerical stability, add epsilon to the sum before taking the square + # root. Note tf.norm does not add epsilon. + slopes = math_ops.sqrt(gradient_squares + epsilon) + penalties = math_ops.square(slopes - 1.0) + penalty = losses.compute_weighted_loss( + penalties, weights, scope=scope, loss_collection=loss_collection, + reduction=reduction) - if add_summaries: - summary.scalar('gradient_penalty_loss', penalty) + if add_summaries: + summary.scalar('gradient_penalty_loss', penalty) - return penalty + return penalty # Original losses from `Generative Adversarial Nets` diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index c15ce5baae..b5cd8c92ba 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -453,10 +453,11 @@ class GradientPenaltyTest(test.TestCase, _PenaltyTest): 'discriminator_scope': self._scope, } self._expected_loss = 9.00000 - self._expected_op_name = 'weighted_loss/value' + self._expected_op_name = 'wasserstein_gradient_penalty/value' self._batch_size = 1 def _discriminator_fn(self, inputs, _): + ops.add_to_collection('fake_update_ops', constant_op.constant(1.0)) return variable_scope.get_variable('dummy_d', initializer=2.0) * inputs def test_loss_with_placeholder(self): @@ -487,6 +488,26 @@ class GradientPenaltyTest(test.TestCase, _PenaltyTest): self.assertEqual( num_vars, len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + def test_works_with_get_collection(self): + """Tests that gradient penalty works inside other scopes.""" + # We ran the discriminator once in the setup, so there should be an op + # already in the collection. + self.assertEqual(1, len(ops.get_collection( + 'fake_update_ops', self._kwargs['discriminator_scope'].name))) + + # Make sure the op is added to the collection even if it's in a name scope. + with ops.name_scope('loss'): + tfgan_losses.wasserstein_gradient_penalty(**self._kwargs) + self.assertEqual(2, len(ops.get_collection( + 'fake_update_ops', self._kwargs['discriminator_scope'].name))) + + # Make sure the op is added to the collection even if it's in a variable + # scope. + with variable_scope.variable_scope('loss_vscope'): + tfgan_losses.wasserstein_gradient_penalty(**self._kwargs) + self.assertEqual(3, len(ops.get_collection( + 'fake_update_ops', self._kwargs['discriminator_scope'].name))) + class MutualInformationPenaltyTest(test.TestCase, _PenaltyTest): """Tests for mutual_information_penalty.""" -- GitLab From 485cb179ea84c8de26263628510f930d07a98c4a Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Mon, 9 Oct 2017 17:23:25 -0700 Subject: [PATCH 205/909] Fix the example in the RNN tutorial which left out one of the pieces of data. PiperOrigin-RevId: 171611082 --- tensorflow/docs_src/tutorials/recurrent.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/docs_src/tutorials/recurrent.md b/tensorflow/docs_src/tutorials/recurrent.md index 73d40575d7..3bae9bb457 100644 --- a/tensorflow/docs_src/tutorials/recurrent.md +++ b/tensorflow/docs_src/tutorials/recurrent.md @@ -51,10 +51,10 @@ The core of the model consists of an LSTM cell that processes one word at a time and computes probabilities of the possible values for the next word in the sentence. The memory state of the network is initialized with a vector of zeros and gets updated after reading each word. For computational reasons, we will -process data in mini-batches of size `batch_size`. In this example, it is important -to note that `current_batch_of_words` does not correspond to a "sentence" of words. -Every word in a batch should correspond to time t. Tensorflow will automatically sum -the gradients of each batch for you. +process data in mini-batches of size `batch_size`. In this example, it is +important to note that `current_batch_of_words` does not correspond to a +"sentence" of words. Every word in a batch should correspond to a time t. +TensorFlow will automatically sum the gradients of each batch for you. For example: ``` @@ -63,16 +63,17 @@ For example: [The, red, fox, jumped, high] words_in_dataset[0] = [The, The] -words_in_dataset[1] = [fox, fox] -words_in_dataset[2] = [is, jumped] -words_in_dataset[3] = [quick, high] -num_batches = 4, batch_size = 2, time_steps = 5 +words_in_dataset[1] = [brown, red] +words_in_dataset[2] = [fox, fox] +words_in_dataset[3] = [is, jumped] +words_in_dataset[4] = [quick, high] +batch_size = 2, time_steps = 5 ``` The basic pseudocode is as follows: ```python -words_in_dataset = tf.placeholder(tf.float32, [num_batches, batch_size, num_features]) +words_in_dataset = tf.placeholder(tf.float32, [time_steps, batch_size, num_features]) lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size) # Initial state of the LSTM memory. hidden_state = tf.zeros([batch_size, lstm.state_size]) -- GitLab From 07d78ddeafe41bc0363ac92efd7ca8ea60478989 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 17:31:28 -0700 Subject: [PATCH 206/909] Removes the use of tf.cond in the SweepHook used in the WALSMatrixFactorization estimator, to prevent a rare but possible race condition. PiperOrigin-RevId: 171612114 --- tensorflow/contrib/factorization/BUILD | 1 - .../contrib/factorization/python/ops/wals.py | 250 ++++++++---------- .../factorization/python/ops/wals_test.py | 14 +- 3 files changed, 111 insertions(+), 154 deletions(-) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index c741815042..44095bd00a 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -246,7 +246,6 @@ tf_py_test( "manual", "noasan", # times out b/63678675 "nomsan", - "notsan", ], ) diff --git a/tensorflow/contrib/factorization/python/ops/wals.py b/tensorflow/contrib/factorization/python/ops/wals.py index 3e3ee5fa57..3976395d78 100644 --- a/tensorflow/contrib/factorization/python/ops/wals.py +++ b/tensorflow/contrib/factorization/python/ops/wals.py @@ -26,7 +26,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -38,31 +37,30 @@ from tensorflow.python.training import session_run_hook class _SweepHook(session_run_hook.SessionRunHook): """Keeps track of row/col sweeps, and runs prep ops before each sweep.""" - def __init__(self, is_row_sweep_var, train_op, num_rows, num_cols, - processed_row_indices, processed_col_indices, row_prep_ops, - col_prep_ops, cache_init_ops, completed_sweeps_var): + def __init__(self, is_row_sweep_var, train_ops, num_rows, num_cols, + input_row_indices, input_col_indices, row_prep_ops, + col_prep_ops, init_op, completed_sweeps_var): """Initializes SweepHook. Args: is_row_sweep_var: A Boolean tf.Variable, determines whether we are currently doing a row or column sweep. It is updated by the hook. - train_op: An op. All the ops created by the hook will have - control_dependencies on train_op. + train_ops: A list of ops. The ops created by this hook will have + control dependencies on `train_ops`. num_rows: int, the total number of rows to be processed. num_cols: int, the total number of columns to be processed. - processed_row_indices: A Tensor of type int64. The indices of the input - rows that are processed during the current sweep. All elements of - processed_row_indices must be in [0, num_rows). - processed_col_indices: A Tensor of type int64. The indices of the input + input_row_indices: A Tensor of type int64. The indices of the input rows + that are processed during the current sweep. All elements of + `input_row_indices` must be in [0, num_rows). + input_col_indices: A Tensor of type int64. The indices of the input columns that are processed during the current sweep. All elements of - processed_col_indices must be in [0, num_cols). + `input_col_indices` must be in [0, num_cols). row_prep_ops: list of ops, to be run before the beginning of each row sweep, in the given order. col_prep_ops: list of ops, to be run before the beginning of each column sweep, in the given order. - cache_init_ops: list of ops, to be run once before training, in the given - order. These are typically local initialization ops (such as cache - initialization). + init_op: op to be run once before training. This is typically a local + initialization op (such as cache initialization). completed_sweeps_var: An integer tf.Variable, indicates the number of completed sweeps. It is updated by the hook. """ @@ -70,55 +68,45 @@ class _SweepHook(session_run_hook.SessionRunHook): self._num_cols = num_cols self._row_prep_ops = row_prep_ops self._col_prep_ops = col_prep_ops - self._cache_init_ops = cache_init_ops + self._init_op = init_op self._is_row_sweep_var = is_row_sweep_var self._completed_sweeps_var = completed_sweeps_var - # Boolean variable that determines whether the cache_init_ops have been run. + # Boolean variable that determines whether the init_ops have been run. self._is_initialized = False - # Boolean variable that is set to True when a sweep is completed. - # Used to run the prep_ops at the beginning of a sweep, in before_run(). - self._is_sweep_done = False - # Ops to run jointly with train_op, responsible for updating - # _is_row_sweep_var and incrementing the global_step and completed_sweeps - # counters. They have control_dependencies on train_op. - self._fetches = self._create_switch_ops(processed_row_indices, - processed_col_indices, train_op) - - def _create_switch_ops(self, processed_row_indices, processed_col_indices, - train_op): + # Ops to run jointly with train_ops, responsible for updating + # `is_row_sweep_var` and incrementing the `global_step` and + # `completed_sweeps` counters. + self._update_op, self._is_sweep_done_var, self._switch_op = ( + self._create_hook_ops(input_row_indices, input_col_indices, train_ops)) + + def _create_hook_ops(self, input_row_indices, input_col_indices, train_ops): """Creates ops to update is_row_sweep_var, global_step and completed_sweeps. - Creates two boolean tensors processed_rows and processed_cols, which keep - track of which rows/cols have been processed during the current sweep. + Creates two boolean tensors `processed_rows` and `processed_cols`, which + keep track of which rows/cols have been processed during the current sweep. Returns ops that should be run after each row / col update. - - When is_row_sweep_var is True, it sets - processed_rows[processed_row_indices] to True. - - When is_row_sweep_var is False, it sets - processed_cols[processed_col_indices] to True . - When all rows or all cols have been processed, negates is_row_sweep_var, - increments the completed_sweeps counter, and resets processed_rows and - processed_cols to False. - All of the ops created by this function have control_dependencies on - train_op. + - When `self._is_row_sweep_var` is True, it sets + processed_rows[input_row_indices] to True. + - When `self._is_row_sweep_var` is False, it sets + processed_cols[input_col_indices] to True. Args: - processed_row_indices: A Tensor. The indices of the input rows that are + input_row_indices: A Tensor. The indices of the input rows that are processed during the current sweep. - processed_col_indices: A Tensor. The indices of the input columns that + input_col_indices: A Tensor. The indices of the input columns that are processed during the current sweep. - train_op: An op. All the ops created by this function have - control_dependencies on train_op. + train_ops: A list of ops. The ops created by this function have control + dependencies on `train_ops`. + Returns: - A list consisting of: - is_sweep_done: A Boolean tensor, determines whether the sweep is done, - i.e. all rows (during a row sweep) or all columns (during a column - sweep) have been processed. - switch_ops: An op that updates is_row_sweep_var when is_sweep_done is - True. Has control_dependencies on train_op. - incr_ops: An op that increments the global_step and completed_sweeps - counters. Has control_dependenciens on switch_ops. + A tuple consisting of: + update_op: An op to be run jointly with training. It updates the state + and increments counters (global step and completed sweeps). + is_sweep_done_var: A Boolean tf.Variable, specifies whether the sweep is + done, i.e. all rows (during a row sweep) or all columns (during a + column sweep) have been processed. + switch_op: An op to be run in `self.before_run` when the sweep is done. """ - processed_rows_init = array_ops.fill(dims=[self._num_rows], value=False) with ops.colocate_with(processed_rows_init): processed_rows = variable_scope.variable( @@ -133,97 +121,72 @@ class _SweepHook(session_run_hook.SessionRunHook): collections=[ops.GraphKeys.GLOBAL_VARIABLES], trainable=False, name="sweep_hook_processed_cols") - # After running the train_op, update processed_rows or processed_cols - # tensors, depending on whether we are currently doing a row or a col sweep - with ops.control_dependencies([train_op]): - - def get_row_update_op(): - with ops.colocate_with(processed_rows): - return state_ops.scatter_update(processed_rows, processed_row_indices, - array_ops.ones_like( - processed_row_indices, - dtype=dtypes.bool)) - - def get_col_update_op(): - with ops.colocate_with(processed_cols): - return state_ops.scatter_update(processed_cols, processed_col_indices, - array_ops.ones_like( - processed_col_indices, - dtype=dtypes.bool)) - - update_processed_op = control_flow_ops.cond( - self._is_row_sweep_var, get_row_update_op, get_col_update_op) - - # After update_processed_op, check whether we have completed a sweep. - # If this is the case, flip the is_row_sweep_var and reset processed_rows - # and processed_cols tensors. - with ops.control_dependencies([update_processed_op]): - - def get_switch_op(): - return state_ops.assign( - self._is_row_sweep_var, - gen_math_ops.logical_not(self._is_row_sweep_var)).op - - def get_reset_op(): - return control_flow_ops.group( - state_ops.assign(processed_rows, processed_rows_init).op, - state_ops.assign(processed_cols, processed_cols_init).op) - - is_sweep_done = control_flow_ops.cond( + switch_ops = control_flow_ops.group( + state_ops.assign( self._is_row_sweep_var, - lambda: math_ops.reduce_all(processed_rows), - lambda: math_ops.reduce_all(processed_cols), - name="sweep_hook_is_sweep_done") - switch_op = control_flow_ops.cond( - is_sweep_done, - get_switch_op, - control_flow_ops.no_op, - name="sweep_hook_switch_op") - reset_op = control_flow_ops.cond( - is_sweep_done, - get_reset_op, - control_flow_ops.no_op, - name="sweep_hook_reset_op") - switch_ops = control_flow_ops.group( - switch_op, reset_op, name="sweep_hook_switch_ops") - - with ops.control_dependencies([switch_ops]): - # Op to increment the completed_sweeps counter. - completed_sweeps_incr_op = control_flow_ops.cond( - is_sweep_done, - lambda: state_ops.assign_add(self._completed_sweeps_var, 1).op, - control_flow_ops.no_op, - name="completed_sweeps_incr") - - # Op to increment the global_step counter. - global_step = framework_variables.get_global_step() - if global_step is not None: - global_step_incr_op = state_ops.assign_add( - global_step, 1, name="global_step_incr").op - else: - global_step_incr_op = control_flow_ops.no_op( - name="global_step_incr") - - incr_ops = control_flow_ops.group( - completed_sweeps_incr_op, - global_step_incr_op, - name="counter_incr_ops") - - return [is_sweep_done, switch_ops, incr_ops] + math_ops.logical_not(self._is_row_sweep_var)), + state_ops.assign(processed_rows, processed_rows_init), + state_ops.assign(processed_cols, processed_cols_init)) + is_sweep_done_var = variable_scope.variable( + False, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + trainable=False, + name="is_sweep_done") + + # After running the `train_ops`, updates `processed_rows` or + # `processed_cols` tensors, depending on whether this is a row or col sweep. + with ops.control_dependencies(train_ops): + with ops.colocate_with(processed_rows): + update_processed_rows = state_ops.scatter_update( + processed_rows, + input_row_indices, + math_ops.logical_and( + self._is_row_sweep_var, + array_ops.ones_like(input_row_indices, dtype=dtypes.bool))) + with ops.colocate_with(processed_cols): + update_processed_cols = state_ops.scatter_update( + processed_cols, + input_col_indices, + math_ops.logical_and( + math_ops.logical_not(self._is_row_sweep_var), + array_ops.ones_like(input_col_indices, dtype=dtypes.bool))) + update_processed_op = control_flow_ops.group( + update_processed_rows, update_processed_cols) - def begin(self): - pass + with ops.control_dependencies([update_processed_op]): + is_sweep_done = math_ops.logical_or( + math_ops.reduce_all(processed_rows), + math_ops.reduce_all(processed_cols)) + # Increments global step. + global_step = framework_variables.get_global_step() + if global_step is not None: + global_step_incr_op = state_ops.assign_add( + global_step, 1, name="global_step_incr").op + else: + global_step_incr_op = control_flow_ops.no_op() + # Increments completed sweeps. + completed_sweeps_incr_op = state_ops.assign_add( + self._completed_sweeps_var, + math_ops.cast(is_sweep_done, dtypes.int32), + use_locking=True).op + update_ops = control_flow_ops.group( + global_step_incr_op, + completed_sweeps_incr_op, + state_ops.assign(is_sweep_done_var, is_sweep_done)) + + return update_ops, is_sweep_done_var, switch_ops def before_run(self, run_context): """Runs the appropriate prep ops, and requests running update ops.""" - # Run the appropriate cache_init and prep ops + # Runs the appropriate init ops and prep ops. sess = run_context.session + is_sweep_done = sess.run(self._is_sweep_done_var) if not self._is_initialized: - logging.info("SweepHook running cache init ops.") - for init_op in self._cache_init_ops: - sess.run(init_op) - - if self._is_sweep_done or not self._is_initialized: + logging.info("SweepHook running cache init op.") + sess.run(self._init_op) + if is_sweep_done: + sess.run(self._switch_op) + if is_sweep_done or not self._is_initialized: logging.info("SweepHook running sweep prep ops.") row_sweep = sess.run(self._is_row_sweep_var) prep_ops = self._row_prep_ops if row_sweep else self._col_prep_ops @@ -232,13 +195,12 @@ class _SweepHook(session_run_hook.SessionRunHook): self._is_initialized = True - # Request running the switch_ops and the incr_ops - logging.info("Partial fit starting.") - return session_run_hook.SessionRunArgs(fetches=self._fetches) + # Requests running `self._update_op` jointly with the training op. + logging.info("Next fit step starting.") + return session_run_hook.SessionRunArgs(fetches=[self._update_op]) def after_run(self, run_context, run_values): - self._is_sweep_done = run_values.results[0] - logging.info("Partial fit done.") + logging.info("Fit step done.") class _StopAtSweepHook(session_run_hook.SessionRunHook): @@ -360,19 +322,19 @@ def _wals_factorization_model_function(features, labels, mode, params): col_prep_ops = [ model.col_update_prep_gramian_op, model.initialize_col_update_op ] - cache_init_ops = [model.worker_init] + init_ops = [model.worker_init] sweep_hook = _SweepHook( is_row_sweep_var, - train_op, + [train_op, loss], params["num_rows"], params["num_cols"], input_row_indices, input_col_indices, row_prep_ops, col_prep_ops, - cache_init_ops, - completed_sweeps_var,) + init_ops, + completed_sweeps_var) training_hooks = [sweep_hook] if max_sweeps is not None: training_hooks.append(_StopAtSweepHook(max_sweeps)) diff --git a/tensorflow/contrib/factorization/python/ops/wals_test.py b/tensorflow/contrib/factorization/python/ops/wals_test.py index b5c1bb1151..8bd72b7025 100644 --- a/tensorflow/contrib/factorization/python/ops/wals_test.py +++ b/tensorflow/contrib/factorization/python/ops/wals_test.py @@ -357,7 +357,7 @@ class WALSMatrixFactorizationTest(test.TestCase): self.assertNear( loss, true_loss, err=.001, - msg="""After row update, eval loss = {}, does not match the true + msg="""After col update, eval loss = {}, does not match the true loss = {}.""".format(loss, true_loss)) @@ -442,7 +442,7 @@ class SweepHookTest(test.TestCase): completed_sweeps_var = variables.Variable(0) sweep_hook = wals_lib._SweepHook( is_row_sweep_var, - self._train_op, + [self._train_op], self._num_rows, self._num_cols, self._input_row_indices_ph, @@ -465,11 +465,9 @@ class SweepHookTest(test.TestCase): 'False.') # Row sweep completed. mon_sess.run(self._train_op, ind_feed([3, 4], [0, 1, 2, 3, 4, 5, 6])) - self.assertFalse(sess.run(is_row_sweep_var), - msg='Row sweep is complete but is_row_sweep is True.') self.assertTrue(sess.run(completed_sweeps_var) == 1, msg='Completed sweeps should be equal to 1.') - self.assertTrue(sweep_hook._is_sweep_done, + self.assertTrue(sess.run(sweep_hook._is_sweep_done_var), msg='Sweep is complete but is_sweep_done is False.') # Col init ops should run. Col sweep not completed. mon_sess.run(self._train_op, ind_feed([], [0, 1, 2, 3, 4])) @@ -478,13 +476,11 @@ class SweepHookTest(test.TestCase): self.assertFalse(sess.run(is_row_sweep_var), msg='Col sweep is not complete but is_row_sweep is ' 'True.') - self.assertFalse(sweep_hook._is_sweep_done, + self.assertFalse(sess.run(sweep_hook._is_sweep_done_var), msg='Sweep is not complete but is_sweep_done is True.') # Col sweep completed. mon_sess.run(self._train_op, ind_feed([], [4, 5, 6])) - self.assertTrue(sess.run(is_row_sweep_var), - msg='Col sweep is complete but is_row_sweep is False') - self.assertTrue(sweep_hook._is_sweep_done, + self.assertTrue(sess.run(sweep_hook._is_sweep_done_var), msg='Sweep is complete but is_sweep_done is False.') self.assertTrue(sess.run(completed_sweeps_var) == 2, msg='Completed sweeps should be equal to 2.') -- GitLab From 2cdd0647e08c1dc7948f70416ee8311c09598e59 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 17:49:32 -0700 Subject: [PATCH 207/909] Make error message more explicit when running FusedConv2DBiasActivationOp with type int8 on a GPU that doesn't support it. Old error message: "No algorithm worked!" New error message: "FusedConv2DBiasActivation is only supported on GPUs with compute capability 6.1 or later." PiperOrigin-RevId: 171614032 --- .../kernels/fused_conv2d_bias_activation_op.cc | 11 +++++++++++ tensorflow/stream_executor/cuda/cuda_dnn.cc | 13 ++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index 256f200868..e4c39739f7 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -298,6 +298,17 @@ void LaunchFusedConv2DBiasActivationOp:: constexpr int rank = is_int8x4 ? 5 : 4; constexpr int vect = is_int8x4 ? 4 : 1; + if (is_int8x4) { + int cc_major, cc_minor; + stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major, + &cc_minor); + OP_REQUIRES( + ctx, cc_major >= 6 && cc_minor >= 1, + errors::Unimplemented( + "FusedConv2DBiasActivation for int8 is only supported on GPUs with " + "compute capability 6.1 or later.")); + } + const int batch_size = GetTensorDim(conv_input_param, data_format, 'N'); int conv_input_rows = GetTensorDim(conv_input_param, data_format, 'H'); int conv_input_cols = GetTensorDim(conv_input_param, data_format, 'W'); diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 0a1a748c40..46516cc445 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2864,10 +2864,18 @@ bool CudnnSupport::DoFusedConvolve( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { #if CUDNN_VERSION < 6000 - LOG(ERROR) << "cudnnConvolutionBiasActivationForward() is only " - "supported for cuDNN version >= 6"; + LOG(WARNING) << "cudnnConvolutionBiasActivationForward() is only " + "supported for cuDNN version >= 6"; return false; #else + int cc_major, cc_minor; + stream->parent()->GetDeviceDescription().cuda_compute_capability(&cc_major, + &cc_minor); + if (cc_major < 6 || (cc_major == 6 && cc_minor < 1)) { + LOG(WARNING) << "cudnnConvolutionBiasActivationForward() for int8 is only " + "supported on GPUs with compute capability 6.1 or later."; + return false; + } return DoFusedConvolveImpl( stream, conv_input_descriptor, conv_input_data, conv_input_scale, @@ -2875,7 +2883,6 @@ bool CudnnSupport::DoFusedConvolve( side_input_scale, bias_descriptor, biases, activation_mode, output_descriptor, output_data, scratch_allocator, algorithm_config, output_profile_result); - return true; #endif } -- GitLab From cd37dbb8d8cdf1c8ae70f3aa8f588b85ce00a0ec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 18:22:25 -0700 Subject: [PATCH 208/909] Benchmark for LSTMBlockCell's forward propagation. PiperOrigin-RevId: 171616821 --- tensorflow/contrib/rnn/BUILD | 11 ++ .../rnn/python/kernel_tests/benchmarking.py | 66 ++++++++ .../rnn/python/kernel_tests/gru_ops_test.py | 157 +++++++++--------- .../rnn/python/kernel_tests/lstm_ops_test.py | 52 ++++++ 4 files changed, 211 insertions(+), 75 deletions(-) create mode 100644 tensorflow/contrib/rnn/python/kernel_tests/benchmarking.py diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD index 3e6c09662f..7dc76cf622 100644 --- a/tensorflow/contrib/rnn/BUILD +++ b/tensorflow/contrib/rnn/BUILD @@ -42,6 +42,7 @@ tf_custom_op_py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":benchmarking", ":gru_ops", ":lstm_ops", "//tensorflow/contrib/compiler:compiler_py", @@ -386,3 +387,13 @@ py_test( "//tensorflow/python:variables", ], ) + +py_library( + name = "benchmarking", + srcs = ["python/kernel_tests/benchmarking.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:framework_ops", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/benchmarking.py b/tensorflow/contrib/rnn/python/kernel_tests/benchmarking.py new file mode 100644 index 0000000000..a48cd58706 --- /dev/null +++ b/tensorflow/contrib/rnn/python/kernel_tests/benchmarking.py @@ -0,0 +1,66 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library for benchmarking OpKernels.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +import time + +from tensorflow.python.framework import ops + + +def device(use_gpu=False): + """TensorFlow device to assign ops to.""" + if use_gpu: + return ops.device("/gpu:0") + return ops.device("/cpu:0") + + +def seconds_per_run(op, sess, num_runs=50): + """Number of seconds taken to execute 'op' once on average.""" + for _ in range(2): + sess.run(op) + + start_time = time.time() + for _ in range(num_runs): + sess.run(op) + + end_time = time.time() + time_taken = (end_time - start_time) / num_runs + return time_taken + + +def dict_product(dicts): + """Constructs iterator over outer product of entries in a dict-of-lists. + + Example: + >>> dict_products({"a": [1,2], "b": [3, 4]}) + >>> [{"a": 1, "b": 3}, + {"a": 1, "b": 4}, + {"a": 2, "b": 3}, + {"a": 2, "b": 4}] + + Args: + dicts: dictionary with string keys and list values. + + Yields: + Individual dicts from outer product. + """ + keys, values = zip(*dicts.items()) + for config_values in itertools.product(*values): + yield dict(zip(keys, config_values)) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py index 4239e32ab9..b865466cc7 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py @@ -18,10 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import time - import numpy as np +from tensorflow.contrib.rnn.python.kernel_tests import benchmarking from tensorflow.contrib.rnn.python.ops import gru_ops from tensorflow.python.client import session from tensorflow.python.framework import dtypes @@ -333,20 +332,6 @@ class GRUBlockCellTest(test.TestCase): #### Benchmarking GRUBlockCell vs GRUCell. -def time_taken_by_op(op, sess, num_runs=50): - """Time taken by the Op.""" - for _ in range(2): - sess.run([op]) - - start_time = time.time() - for _ in range(num_runs): - sess.run([op]) - - end_time = time.time() - time_taken = end_time - start_time - return time_taken - - def training_gru_block_vs_gru_cell(batch_size, cell_size, input_size, @@ -357,7 +342,7 @@ def training_gru_block_vs_gru_cell(batch_size, ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: # Specify the device which is been used. - with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): + with benchmarking.device(use_gpu): # Random initializers. seed = 1994 @@ -387,7 +372,8 @@ def training_gru_block_vs_gru_cell(batch_size, learning_rate).minimize(cost) # time for a training step. - basic_time_training = time_taken_by_op(optimizer, sess, iters) + basic_time_training = benchmarking.seconds_per_run( + optimizer, sess, iters) # Output from the basic GRU cell implementation. with vs.variable_scope("block", initializer=initializer): @@ -406,7 +392,8 @@ def training_gru_block_vs_gru_cell(batch_size, learning_rate).minimize(cost) # time for a training step. - block_time_training = time_taken_by_op(optimizer, sess, iters) + block_time_training = benchmarking.seconds_per_run( + optimizer, sess, iters) performance_training = ( basic_time_training - block_time_training) * 100 / basic_time_training @@ -429,7 +416,7 @@ def inference_gru_block_vs_gru_cell(batch_size, """Benchmark inference speed between GRUBlockCell vs GRUCell.""" ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: - with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): + with benchmarking.device(use_gpu): # Random initializers. seed = 1994 @@ -451,7 +438,8 @@ def inference_gru_block_vs_gru_cell(batch_size, time_major=True, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) - basic_time_inference = time_taken_by_op(outputs_dynamic, sess, iters) + basic_time_inference = benchmarking.seconds_per_run( + outputs_dynamic, sess, iters) # Output from the block GRU cell implementation. with vs.variable_scope("block", initializer=initializer): @@ -463,7 +451,8 @@ def inference_gru_block_vs_gru_cell(batch_size, time_major=True, dtype=dtypes.float32) sess.run([variables.global_variables_initializer()]) - block_time_inference = time_taken_by_op(outputs_dynamic, sess, iters) + block_time_inference = benchmarking.seconds_per_run( + outputs_dynamic, sess, iters) performance_inference = (basic_time_inference - block_time_inference ) * 100 / basic_time_inference @@ -484,7 +473,7 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size, """Benchmark single bprop step speed between GRUBlockCell vs GRUCell.""" ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: - with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): + with benchmarking.device(use_gpu): initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989) # Inputs x = vs.get_variable("x", [batch_size, input_size]) @@ -496,7 +485,8 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size, array_ops.identity(h)) sess.run([variables.global_variables_initializer()]) grad_output_wrt_input = gradients_impl.gradients([output], h) - basic_time_bprop = time_taken_by_op(grad_output_wrt_input, sess, iters) + basic_time_bprop = benchmarking.seconds_per_run(grad_output_wrt_input, + sess, iters) # Output from the block GRU cell implementation. with vs.variable_scope("block", initializer=initializer): @@ -504,7 +494,8 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size, array_ops.identity(h)) sess.run([variables.global_variables_initializer()]) grad_output_wrt_input = gradients_impl.gradients([output], h) - block_time_bprop = time_taken_by_op(grad_output_wrt_input, sess, iters) + block_time_bprop = benchmarking.seconds_per_run(grad_output_wrt_input, + sess, iters) performance_inference = ( basic_time_bprop - block_time_bprop) * 100 / basic_time_bprop @@ -526,23 +517,29 @@ class BenchmarkGRUBlock(test.Benchmark): print("batch_size, cell_size, input_size, time_steps, GPU, " "basic_time_training, block_time_training, performance_training[%]") iters = 10 - for use_gpu in [True, False]: - for batch_size in [1, 32, 128]: - for cell_size in [128, 512]: - for input_size in [128, 512]: - for time_steps in [50]: - basic_time, block_time = training_gru_block_vs_gru_cell( - batch_size, cell_size, input_size, time_steps, use_gpu, iters) - self.report_benchmark( - name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % - (batch_size, cell_size, input_size, time_steps, use_gpu), - iters=iters, - wall_time=basic_time) - self.report_benchmark( - name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % - (batch_size, cell_size, input_size, time_steps, use_gpu), - iters=iters, - wall_time=block_time) + + for config in benchmarking.dict_product({ + "use_gpu": [True, False], + "batch_size": [1, 32, 128], + "cell_size": [128, 512], + "input_size": [128, 512], + "time_steps": [50] + }): + basic_time, block_time = training_gru_block_vs_gru_cell( + config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"], iters) + self.report_benchmark( + name="GRUCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"]), + iters=iters, + wall_time=basic_time) + self.report_benchmark( + name="GRUBlockCell_training_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"]), + iters=iters, + wall_time=block_time) def benchmarkInferenceBlockGRUVsGRUCell(self): print("--------------------------------------------------------------") @@ -551,23 +548,28 @@ class BenchmarkGRUBlock(test.Benchmark): "batch_size, cell_size, input_size, time_steps, GPU, " "basic_time_inference, block_time_inference, performance_inference[%]") iters = 10 - for use_gpu in [True, False]: - for batch_size in [1, 32, 128]: - for cell_size in [128, 512]: - for input_size in [128, 512]: - for time_steps in [50]: - basic_time, block_time = inference_gru_block_vs_gru_cell( - batch_size, cell_size, input_size, time_steps, use_gpu, iters) - self.report_benchmark( - name="GRUCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % - (batch_size, cell_size, input_size, time_steps, use_gpu), - iters=iters, - wall_time=basic_time) - self.report_benchmark( - name="GRUBlockCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" - % (batch_size, cell_size, input_size, time_steps, use_gpu), - iters=iters, - wall_time=block_time) + for config in benchmarking.dict_product({ + "use_gpu": [True, False], + "batch_size": [1, 32, 128], + "cell_size": [128, 512], + "input_size": [128, 512], + "time_steps": [50] + }): + basic_time, block_time = inference_gru_block_vs_gru_cell( + config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"], iters) + self.report_benchmark( + name="GRUCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"]), + iters=iters, + wall_time=basic_time) + self.report_benchmark( + name="GRUBlockCell_inference_time_BS%i_CS%i_IS%i_TS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"]), + iters=iters, + wall_time=block_time) def benchmarkSingleBpropStepBlockGRUVsGRUCell(self): print("--------------------------------------------------------------") @@ -575,22 +577,27 @@ class BenchmarkGRUBlock(test.Benchmark): print("batch_size, cell_size, input_size, GPU, basic_time, " "block_time, performance_inference[%]") iters = 10 - for use_gpu in [True, False]: - for batch_size in [1, 32, 128]: - for cell_size in [128, 512]: - for input_size in [128, 512]: - basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell( - batch_size, cell_size, input_size, use_gpu, iters) - self.report_benchmark( - name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % - (batch_size, cell_size, input_size, use_gpu), - iters=iters, - wall_time=basic_time) - self.report_benchmark( - name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" - % (batch_size, cell_size, input_size, use_gpu), - iters=iters, - wall_time=block_time) + for config in benchmarking.dict_product({ + "use_gpu": [True, False], + "batch_size": [1, 32, 128], + "cell_size": [128, 512], + "input_size": [128, 512] + }): + basic_time, block_time = single_bprop_step_gru_block_vs_gru_cell( + config["batch_size"], config["cell_size"], config["input_size"], + config["use_gpu"], iters) + self.report_benchmark( + name="GRUCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["use_gpu"]), + iters=iters, + wall_time=basic_time) + self.report_benchmark( + name="GRUBlockCell_Bprop_single_step_time_BS%i_CS%i_IS%i_gpu_%s" % + (config["batch_size"], config["cell_size"], config["input_size"], + config["use_gpu"]), + iters=iters, + wall_time=block_time) print("--------------------------------------------------------------") diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index 0ec37411f5..3016821b74 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -20,7 +20,9 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.rnn.python.kernel_tests import benchmarking from tensorflow.contrib.rnn.python.ops import lstm_ops +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -467,6 +469,56 @@ class LSTMBlockCellTest(test.TestCase): for basic, unfused in zip(basic_wgrads, unfused_wgrads): self.assertAllClose(basic, unfused, rtol=1e-2, atol=1e-2) +#### Benchmarking. + + +class BenchmarkLSTMBlock(test.Benchmark): + + def benchmarkLSTMBlockCellFpropWithDynamicRNN(self): + print("BlockLSTMCell forward propagation via dynamic_rnn().") + print("--------------------------------------------------------------") + print("LSTMBlockCell Seconds per inference.") + print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") + iters = 10 + for config in benchmarking.dict_product({ + "batch_size": [1, 32, 128], + "cell_size": [32, 128, 512], + "input_size": [128, 512], + "time_steps": [10, 25, 100], + "use_gpu": [True, False] + }): + with ops.Graph().as_default(): + with benchmarking.device(use_gpu=config["use_gpu"]): + inputs = variable_scope.get_variable("x", [ + config["time_steps"], config["batch_size"], config["input_size"] + ]) + cell = lstm_ops.LSTMBlockCell(config["cell_size"]) + outputs = rnn.dynamic_rnn( + cell, inputs, time_major=True, dtype=dtypes.float32) + init_op = variables.global_variables_initializer() + + with session.Session() as sess: + sess.run(init_op) + wall_time = benchmarking.seconds_per_run(outputs, sess, iters) + + # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable + # is set, this will produce a copy-paste-able CSV file. + print(",".join( + map(str, [ + config["batch_size"], config["cell_size"], config["input_size"], + config["time_steps"], config["use_gpu"], wall_time + ]))) + benchmark_name_template = "_".join([ + "LSTMBlockCell_fprop", "BS%(batch_size)i", "CS%(cell_size)i", + "IS%(input_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" + ]) + + self.report_benchmark( + name=benchmark_name_template % config, + iters=iters, + wall_time=wall_time, + extras=config) + if __name__ == "__main__": test.main() -- GitLab From 103d383a6c73363d16034c57fa7da6aea7876912 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 18:41:05 -0700 Subject: [PATCH 209/909] Add scaled_softplus to the documented symbols so it can be accessed as tf.contrib.nn.scaled_softplus. PiperOrigin-RevId: 171618233 --- tensorflow/contrib/nn/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index be0957f473..7007e26bac 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -19,6 +19,7 @@ @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@rank_sampled_softmax_loss +@@scaled_softplus """ from __future__ import absolute_import -- GitLab From d08cb107e6eeedd74c44f0d3654753b141cfa645 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 18:59:24 -0700 Subject: [PATCH 210/909] Scheduler exports tensor size info to RunMetadata. In addition, tensor size histogram is printed out optionally (use vmodule=analytical_cost_estimator=1 or 2). PiperOrigin-RevId: 171619454 --- .../costs/analytical_cost_estimator.cc | 14 +- tensorflow/core/grappler/costs/utils.cc | 164 ++++++++++++++++++ tensorflow/core/grappler/costs/utils.h | 48 +++++ tensorflow/core/grappler/costs/utils_test.cc | 113 ++++++++++++ .../core/grappler/costs/virtual_scheduler.cc | 71 ++++++-- .../core/grappler/costs/virtual_scheduler.h | 2 +- .../grappler/costs/virtual_scheduler_test.cc | 10 +- 7 files changed, 395 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc index 91b6686971..ca66f7c75a 100644 --- a/tensorflow/core/grappler/costs/analytical_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/analytical_cost_estimator.cc @@ -102,12 +102,20 @@ Status AnalyticalCostEstimator::PredictCosts(const GraphDef& optimized_graph, } } while (scheduler.MarkCurrNodeExecuted(node_costs)); - *costs = scheduler.Summary(); + RunMetadata run_metadata; + *costs = scheduler.Summary(&run_metadata); VLOG(1) << inaccurate_nodes.size() << " out of " << optimized_graph.node_size() << " nodes have inaccurate time estimation"; - for (const auto& node : inaccurate_nodes) { - VLOG(2) << "Node with inaccurate time estimation: " << node; + if (VLOG_IS_ON(3)) { + for (const auto& node : inaccurate_nodes) { + VLOG(4) << "Node with inaccurate time estimation: " << node; + } + } + + if (VLOG_IS_ON(1)) { + bool verbosity = VLOG_IS_ON(2); + VLOG(1) << GetStatsStringFromRunMetadata(run_metadata, verbosity); } return Status::OK(); } diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index ff65aca13d..1504d6b74b 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -26,21 +26,27 @@ limitations under the License. #include "cuda/include/cudnn.h" #endif +#include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/step_stats.pb.h" #include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_description.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/bits.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -291,5 +297,163 @@ OpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph, return ret; } +void TensorSizeHistogram::Add(const uint64 value) { + num_elem_++; + sum_elem_ += value; + min_ = std::min(min_, value); + max_ = std::max(max_, value); + buckets_[Index(value)]++; +} + +void TensorSizeHistogram::Merge(const TensorSizeHistogram& src) { + num_elem_ += src.num_elem_; + sum_elem_ += src.sum_elem_; + min_ = std::min(min_, src.min_); + max_ = std::max(max_, src.max_); + std::transform(buckets_.begin(), buckets_.end(), src.buckets_.begin(), + buckets_.begin(), std::plus()); +} + +std::string TensorSizeHistogram::ToString() const { + std::string r; + char buf[200]; + snprintf(buf, sizeof(buf), "Count: %lld, Average: ", num_elem_); + r.append(buf); + r.append(strings::HumanReadableNumBytes(Average())); + r.append(", Min: "); + r.append(strings::HumanReadableNumBytes(min_)); + r.append(", Max: "); + r.append(strings::HumanReadableNumBytes(max_)); + r.append("\n------------------------------------------------------\n"); + const double mult = num_elem_ > 0 ? 100.0 / num_elem_ : 0.0; + uint64 cumul_sum = 0; + + const int size_string_width = 12; + for (int i = 0; i < buckets_.size(); i++) { + if (buckets_[i] == 0) continue; + cumul_sum += buckets_[i]; + r.append("[ "); + if (i == 0) { + r.append(size_string_width - 2, ' '); + r.append("0B"); + } else { + uint64 left = 1ULL << (i - 1); + const auto left_string = strings::HumanReadableNumBytes(left); + r.append(size_string_width - left_string.size(), ' '); + r.append(left_string); + } + r.append(", "); + uint64 right = 1ULL << i; + const auto right_string = strings::HumanReadableNumBytes(right); + r.append(size_string_width - right_string.size(), ' '); + r.append(right_string); + snprintf(buf, sizeof(buf), ") %7lld %7.3f%% %7.3f%% ", + buckets_[i], // count + mult * buckets_[i], // percentage + mult * cumul_sum); // cum percentage + r.append(buf); + + // Add hash marks based on percentage; 40 marks for 100%. + auto marks = static_cast( + (static_cast(40 * buckets_[i] + (num_elem_ >> 1)) / num_elem_)); + r.append(marks, '#'); + r.push_back('\n'); + } + return r; +} + +const int TensorSizeHistogram::Index(const uint64 value) const { + // Log2Floor64 returns -1 for 0, 0 for 1, 1 for 2-3, 2 for 4-7, ... + const auto index = Log2Floor64(value) + 1; + return std::min(index, kMaxBuckets - 1); +} + +string GetDeviceClassForNonChannelDevice(const string& device_name) { + DeviceNameUtils::ParsedName parsed_name; + bool parsed = DeviceNameUtils::ParseFullName(device_name, &parsed_name); + if (parsed) { + const string& jobname = parsed_name.has_job ? parsed_name.job : ""; + return strings::StrCat("/", jobname, "/", parsed_name.type); + } else { + return "Unclassified"; + } +} + +string GetDeviceClass(const string& device_name) { + // TODO(dyoon): channel device name follows the convention we currently have + // in VirtualScheduler. This should be revised with VirtualScheduler as well + // as VirtualPlacer in the future. + if (device_name.find("Channel") != string::npos) { + const string from = " from "; + const string to = " to "; + const auto from_loc = device_name.find(from); + const auto to_loc = device_name.find(to); + const auto src_device_full = device_name.substr( + from_loc + from.size(), to_loc - (from_loc + from.size())); + const auto dst_device_full = device_name.substr(to_loc + to.size()); + return strings::StrCat( + "Channel", ": ", GetDeviceClassForNonChannelDevice(src_device_full), + " -> ", GetDeviceClassForNonChannelDevice(dst_device_full)); + } else { + return GetDeviceClassForNonChannelDevice(device_name); + } +} + +string GetStatsStringFromRunMetadata(const RunMetadata& run_metadata, + bool verbosity) { + // TODO(dyoon): print out other stats as needed. + std::ostringstream output; + + // Tensor size histogram: + // if verbosity, it outputs per-device histogram, + // otherwise, only per-class histogram. + std::unordered_map device_to_hist_map; + const auto& step_stats = run_metadata.step_stats(); + for (const auto& dev_stat : step_stats.dev_stats()) { + const auto& device_name = dev_stat.device(); + auto& hist = device_to_hist_map[device_name]; + for (const auto& node_stat : dev_stat.node_stats()) { + for (const auto& node_output : node_stat.output()) { + // TODO(dyoon): Calculate tensor size from tensor_description's dtype + // and shape, instead of using optional allocation_description. + const auto size = node_output.tensor_description() + .allocation_description() + .allocated_bytes(); + hist.Add(size); + } + } + } + if (verbosity) { + output << "\n"; + output << "Per device tensor size histogram.\n"; + } + + std::unordered_map device_class_to_hist_map; + for (const auto& device_hist : device_to_hist_map) { + const auto& device_name = device_hist.first; + const auto& hist = device_hist.second; + if (verbosity) { + output << "Device: " << device_name << "\n" << hist.ToString() << "\n"; + } + const auto device_class = GetDeviceClass(device_name); + auto it = device_class_to_hist_map.find(device_class); + if (it == device_class_to_hist_map.end()) { + device_class_to_hist_map.emplace(device_class, TensorSizeHistogram(hist)); + } else { + it->second.Merge(hist); + } + } + output << "\n"; + output << "Aggregated per device / channel type tensor size histogram:\n"; + for (const auto& device_hist : device_class_to_hist_map) { + const auto& device_name = device_hist.first; + const auto& hist = device_hist.second; + output << "Device: " << device_name << "\n" << hist.ToString() << "\n"; + } + output << "\n"; + + return output.str(); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/utils.h b/tensorflow/core/grappler/costs/utils.h index 96f2935951..409f07b28b 100644 --- a/tensorflow/core/grappler/costs/utils.h +++ b/tensorflow/core/grappler/costs/utils.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/graph/types.h" #include "tensorflow/core/grappler/costs/op_performance_data.pb.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/protobuf/device_properties.pb.h" namespace tensorflow { @@ -60,6 +61,53 @@ OpInfo BuildOpInfoWithoutDevice( OpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph, const GraphDef& graph); +// Simple histogram for profiling Tensor size; histogram uses logarithmic +// buckets. +class TensorSizeHistogram { + public: + TensorSizeHistogram() : buckets_(kMaxBuckets, 0) {} + + void Add(const uint64 value); + void Merge(const TensorSizeHistogram& src); + double Average() const { + if (num_elem_ > 0) { + return static_cast(sum_elem_) / num_elem_; + } else { + return 0.0; + } + } + uint64 Min() const { return min_; } + uint64 Max() const { return max_; } + uint64 NumElem() const { return num_elem_; } + uint64 SumElem() const { return sum_elem_; } + std::string ToString() const; + + protected: + const int Index(const uint64 value) const; + const std::vector& GetBuckets() const { return buckets_; } + + private: + const int kMaxBuckets = 64; + uint64 num_elem_ = 0; + uint64 sum_elem_ = 0; + // min_ and max_ are initialized to a very large value and zero, respectively, + // so that any value added can replace the initial min_ and max_. + uint64 min_ = kuint64max; + uint64 max_ = 0; + // Buckets are logarithmic: + // 0B, 1B, 2-3B, 4-7B, 8-15B, ..., 2^N - 2^(N+1)-1B, ... + std::vector buckets_; +}; + +// Helper functions for aggregating per-device stats into per-device-class +// stats. +string GetDeviceClassForNonChannelDevice(const string& device_name); +string GetDeviceClass(const string& device_name); + +// Get stats in string format from RunMetadata. +string GetStatsStringFromRunMetadata(const RunMetadata& run_metadata, + bool verbosity); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/utils_test.cc b/tensorflow/core/grappler/costs/utils_test.cc index 00cd967fc8..bd0af79029 100644 --- a/tensorflow/core/grappler/costs/utils_test.cc +++ b/tensorflow/core/grappler/costs/utils_test.cc @@ -172,5 +172,118 @@ TEST_F(UtilsTest, TestSkipControlInput) { EXPECT_TRUE(node_found); } +// Class for testing TensorSizeHistogram. +class TestTensorSizeHistogram : public TensorSizeHistogram { + public: + FRIEND_TEST(TensorSizeHistogramTest, Constructor); + FRIEND_TEST(TensorSizeHistogramTest, Index); + FRIEND_TEST(TensorSizeHistogramTest, Add); + FRIEND_TEST(TensorSizeHistogramTest, Merge); +}; + +TEST(TensorSizeHistogramTest, Constructor) { + TestTensorSizeHistogram hist; + EXPECT_EQ(0, hist.NumElem()); + EXPECT_EQ(0, hist.SumElem()); + EXPECT_LT(1000000000, hist.Min()); // Initially, min_ is a very large value. + EXPECT_EQ(0, hist.Max()); + EXPECT_EQ(0.0, hist.Average()); + const auto& buckets = hist.GetBuckets(); + for (const auto& bucket : buckets) { + EXPECT_EQ(0, bucket); + } +} + +TEST(TensorSizeHistogramTest, Index) { + TestTensorSizeHistogram hist; + EXPECT_EQ(0, hist.Index(0)); + EXPECT_EQ(1, hist.Index(1)); + EXPECT_EQ(2, hist.Index(2)); + EXPECT_EQ(2, hist.Index(3)); + EXPECT_EQ(3, hist.Index(4)); + EXPECT_EQ(3, hist.Index(5)); + EXPECT_EQ(3, hist.Index(6)); + EXPECT_EQ(3, hist.Index(7)); + EXPECT_EQ(4, hist.Index(8)); + EXPECT_EQ(4, hist.Index(15)); + EXPECT_EQ(5, hist.Index(16)); + EXPECT_EQ(5, hist.Index(31)); + EXPECT_EQ(6, hist.Index(32)); + EXPECT_EQ(11, hist.Index(1025)); +} + +TEST(TensorSizeHistogramTest, Add) { + TestTensorSizeHistogram hist; + hist.Add(1037); + hist.Add(1038); + hist.Add(1039); + + const auto& buckets = hist.GetBuckets(); + EXPECT_EQ(3, hist.NumElem()); + EXPECT_EQ(1037 + 1038 + 1039, hist.SumElem()); + EXPECT_DOUBLE_EQ(1038.0, hist.Average()); + EXPECT_EQ(1037, hist.Min()); + EXPECT_EQ(1039, hist.Max()); + EXPECT_EQ(3, buckets.at(11)); +} + +TEST(TensorSizeHistogramTest, Merge) { + TestTensorSizeHistogram hist1; + const auto& buckets = hist1.GetBuckets(); + hist1.Add(1037); + hist1.Add(1038); + hist1.Add(1039); + + TestTensorSizeHistogram hist2(hist1); + hist1.Merge(hist2); + EXPECT_EQ(6, hist1.NumElem()); + EXPECT_EQ(2 * (1037 + 1038 + 1039), hist1.SumElem()); + EXPECT_DOUBLE_EQ(1038.0, hist1.Average()); + EXPECT_EQ(1037, hist1.Min()); + EXPECT_EQ(1039, hist1.Max()); + EXPECT_EQ(6, buckets.at(11)); + + TestTensorSizeHistogram hist3; + hist3.Add(1); + hist3.Add(2); + hist3.Add(4); + + hist1.Merge(hist3); + EXPECT_EQ(9, hist1.NumElem()); + EXPECT_EQ(2 * (1037 + 1038 + 1039) + 1 + 2 + 4, hist1.SumElem()); + EXPECT_DOUBLE_EQ((2 * (1037 + 1038 + 1039) + 1 + 2 + 4) / 9.0, + hist1.Average()); + EXPECT_EQ(1, hist1.Min()); + EXPECT_EQ(1039, hist1.Max()); + EXPECT_EQ(1, buckets.at(1)); + EXPECT_EQ(1, buckets.at(2)); + EXPECT_EQ(1, buckets.at(3)); + EXPECT_EQ(6, buckets.at(11)); +} + +TEST(DeviceClassTest, GetDeviceClass) { + EXPECT_EQ( + "Channel: /ps/CPU -> /worker/GPU", + GetDeviceClass("Channel from /job:ps/replica:0/task:0/device:CPU:0 to " + "/job:worker/replica:7/task:0/device:GPU:7")); + EXPECT_EQ( + "Channel: /worker_train/CPU -> /ps/GPU", + GetDeviceClass( + "Channel from /job:worker_train/replica:0/task:0/device:CPU:0 to " + "/job:ps/replica:7/task:0/device:GPU:7")); +} + +TEST(DeviceClassTest, GetDeviceClassForNonChannelDevice) { + EXPECT_EQ("Unclassified", + GetDeviceClassForNonChannelDevice("SOMETHING_WEIRD_DEVICE_NAME")); + EXPECT_EQ("/worker/GPU", GetDeviceClassForNonChannelDevice( + "/job:worker/replica:0/task:0/device:GPU:0")); + EXPECT_EQ("/worker/CPU", GetDeviceClassForNonChannelDevice( + "/job:worker/replica:0/task:0/device:CPU:0")); + EXPECT_EQ("/worker_train/CPU", GetDeviceClassForNonChannelDevice( + "/job:worker_train/replica:7/CPU:0")); + EXPECT_EQ("//GPU", GetDeviceClassForNonChannelDevice("/device:GPU:7")); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 99ea75f703..1ae6fac8c8 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -26,7 +27,9 @@ limitations under the License. #include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -51,7 +54,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) { result.max_per_op_streaming = std::max(left.max_per_op_streaming, right.max_per_op_streaming); } - VLOG(3) << "costs execution_time=" << result.execution_time.count() + VLOG(4) << "costs execution_time=" << result.execution_time.count() << " max_memory=" << result.max_memory << " max_per_op_buffers=" << result.max_per_op_buffers << " max_per_op_streaming=" << result.max_per_op_streaming; @@ -544,7 +547,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { auto& device_op_cost = FindOrCreateZero(op_name, &device.op_to_cost); device_op_cost = CombineCosts(device_op_cost, node_costs); - VLOG(2) << "Op scheduled -- name: " << node->name() << ", op: " << node->op() + VLOG(3) << "Op scheduled -- name: " << node->name() << ", op: " << node->op() << ", device: " << node->device() << ", ready: " << node_state.time_ready.count() << ", scheduled: " << node_state.time_scheduled.count() @@ -649,12 +652,12 @@ Costs VirtualScheduler::Summary() const { << ", execution_time = " << state.GetCurrTime().count() << ", memory usage: " << "persistenst = " - << Round2(persistent_memory_usage / 1024.0 / 1024.0 / 1024.0) - << " GB, peak = " - << Round2(state.max_memory_usage / 1024.0 / 1024.0 / 1024.0) - << " GB, total = " - << Round2(max_memory_usage / 1024.0 / 1024.0 / 1024.0) - << " GB, at the end: " << state.memory_usage << " B"; + << strings::HumanReadableNumBytes(persistent_memory_usage) + << ", peak = " + << strings::HumanReadableNumBytes(state.max_memory_usage) + << ", total = " << strings::HumanReadableNumBytes(max_memory_usage) + << ", at the end: " + << strings::HumanReadableNumBytes(state.memory_usage); VLOG(1) << "Per-op execution time (and memory usage at peak memory usage):"; @@ -668,16 +671,20 @@ Costs VirtualScheduler::Summary() const { for (const auto& op_cost_pair : state.op_to_cost) { const auto& op = op_cost_pair.first; const auto& cost = op_cost_pair.second.execution_time.count(); - const float mem_usage_gb = - Round2(op_to_memory[op] / 1024.0 / 1024.0 / 1024.0); - int64 op_mem_usage = op_to_memory.at(op); + int64 op_mem_usage = 0; + auto it = op_to_memory.find(op); + if (it != op_to_memory.end()) { + op_mem_usage = it->second; + } + const float mem_usage_percent = max_memory_usage > 0 ? Round2(100.0 * op_mem_usage / max_memory_usage) : 0.0; if (cost || mem_usage_percent > 1.0) { // Print out only non-zero cost ops or ops with > 1% memory usage. - VLOG(1) << " + " << op << " : " << cost << " (" << mem_usage_gb - << " GB [" << mem_usage_percent << "%] " + VLOG(1) << " + " << op << " : " << cost << " (" + << strings::HumanReadableNumBytes(op_mem_usage) << " [" + << mem_usage_percent << "%] " << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")"); } } @@ -686,11 +693,13 @@ Costs VirtualScheduler::Summary() const { } } - // Also log the op description and their corresponding counts. - VLOG(2) << "Node description, counts, cost:"; - for (const auto& item : op_counts_) { - VLOG(2) << "Node: " << item.first << ", Count: " << item.second - << ", Individual Cost: " << op_costs_.at(item.first); + if (VLOG_IS_ON(2)) { + // Also log the op description and their corresponding counts. + VLOG(2) << "Node description, counts, cost:"; + for (const auto& item : op_counts_) { + VLOG(2) << "Node: " << item.first << ", Count: " << item.second + << ", Individual Cost: " << op_costs_.at(item.first); + } } VLOG(1) << "Critical path execution time: " @@ -709,6 +718,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { for (const auto& node_def : device.second.nodes_executed) { const NodeState& nodestate = node_map_.at(node_def); NodeExecStats* node_stats = device_stepstats->add_node_stats(); + uint64 total_output_size = 0; for (int slot = 0; slot < nodestate.output_properties.size(); slot++) { const auto& properties = nodestate.output_properties[slot]; NodeOutput* no = node_stats->add_output(); @@ -716,6 +726,14 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { TensorDescription* tensor_descr = no->mutable_tensor_description(); tensor_descr->set_dtype(properties.dtype()); *tensor_descr->mutable_shape() = properties.shape(); + // Optional allocation description. + const auto tensor_size = + CalculateOutputSize(nodestate.output_properties, slot); + total_output_size += tensor_size; + tensor_descr->mutable_allocation_description()->set_requested_bytes( + tensor_size); + tensor_descr->mutable_allocation_description()->set_allocated_bytes( + tensor_size); } node_stats->set_timeline_label(node_def->op()); node_stats->set_node_name(node_def->name()); @@ -728,6 +746,23 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { node_stats->set_all_end_rel_micros( nodestate.time_finished.asMicroSeconds().count() - nodestate.time_scheduled.asMicroSeconds().count()); + auto* mem_stats = node_stats->mutable_memory_stats(); + // VirtualScheduler does not specify scratch pad memory usage. + mem_stats->set_host_temp_memory_size(0); + mem_stats->set_device_temp_memory_size(0); + int64 host_persistent_memory_size = 0; + int64 device_persistent_memory_size = 0; + if (IsPersistentNode(node_def)) { + if (device.first.find("cpu") != string::npos || + device.first.find("CPU") != string::npos) { + host_persistent_memory_size = total_output_size; + } else { + device_persistent_memory_size = total_output_size; + } + } + mem_stats->set_host_persistent_memory_size(host_persistent_memory_size); + mem_stats->set_device_persistent_memory_size( + device_persistent_memory_size); *device_partition_graph->mutable_node()->Add() = *node_def; } } diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h index 767b91677f..8741afff7d 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.h +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -327,7 +327,7 @@ class VirtualScheduler { // Auxilliary data structures for constructing NodeState and DeviceState. GraphProperties graph_properties_; - Cluster* cluster_; // Not owned. + Cluster* cluster_; // Not owned. const GrapplerItem* grappler_item_; // Not owned. bool use_static_shapes_; diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index 64fb626422..5656aab4b4 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -1235,7 +1235,7 @@ TEST_F(VirtualSchedulerTest, CalculateOutputSize) { EXPECT_EQ(2 * 10 * 10 * 10, scheduler_->CalculateOutputSize(output, 2)); EXPECT_EQ(4 * 100 * 7 * 8 * 99, scheduler_->CalculateOutputSize(output, 3)); - // Any uknown shape (-1) shall yield zero output size. + // Any unknown shape (-1) shall yield zero output size. EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 4)); EXPECT_EQ(0, scheduler_->CalculateOutputSize(output, 5)); @@ -1320,8 +1320,10 @@ TEST_F(VirtualSchedulerTest, ComplexDependency) { return std::make_pair(node_port.first->name(), node_port.second); }); std::set> expected = { - std::make_pair("bn", -1), std::make_pair("bn", 0), - std::make_pair("bn", 2), std::make_pair("x", 0), + std::make_pair("bn", -1), + std::make_pair("bn", 0), + std::make_pair("bn", 2), + std::make_pair("x", 0), }; ExpectSetEq(expected, nodes_in_memory); @@ -1512,7 +1514,6 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) { output_properties.push_back(output_property); } return scheduler_->CalculateOutputSize(output_properties, 0); - }; // Validate transfer size. @@ -1529,6 +1530,5 @@ TEST_F(VirtualSchedulerTest, InterDeviceTransfer) { EXPECT_EQ(get_output_size(recv_op_names[-1]), 4); EXPECT_EQ(get_output_size(send_op_names[-1]), 4); } - } // end namespace grappler } // end namespace tensorflow -- GitLab From 403e51018b3c47cd5989d6b50776e235221fade4 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 9 Oct 2017 19:12:48 -0700 Subject: [PATCH 211/909] [XLA] Factor out repeated LatestNonGteAncestorAndIndex helper. PiperOrigin-RevId: 171620470 --- .../compiler/xla/service/cpu/ir_emitter.cc | 18 ++-------- .../xla/service/gpu/hlo_to_ir_bindings.cc | 2 +- .../xla/service/gpu/ir_emission_utils.cc | 7 ---- .../xla/service/gpu/ir_emission_utils.h | 4 --- .../xla/service/gpu/ir_emitter_unnested.cc | 33 +++++-------------- .../compiler/xla/service/hlo_instruction.cc | 23 +++++++++++++ .../compiler/xla/service/hlo_instruction.h | 20 +++++++++++ 7 files changed, 55 insertions(+), 52 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index c9c87f065b..a58db883d3 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2102,19 +2102,6 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice, namespace { -// Returns the first non-GetTupleElement ancestor instruction of 'hlo'. -// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the -// (possibly nested) tuple indices used on the path from ancestor to 'hlo'. -const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo, - ShapeIndex* index) { - if (hlo->opcode() == HloOpcode::kGetTupleElement) { - const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index); - index->push_back(hlo->tuple_index()); - return operand; - } - return hlo; -} - // Checks if we can emit code for DynamicUpdateSlice to update data in-place. // Returns true if operand 0 of DynamicUpdateSlice and its output buffer // share the same buffer allocation. @@ -2126,9 +2113,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, // Walk DynamicUpdateSlice operand(0) to parameter and get its // associated operand. See if it shares an allocation with this operand. + HloInstruction* operand; ShapeIndex index; - auto* operand = - LatestNonGteAncestorAndIndex(dynamic_update_slice->operand(0), &index); + std::tie(operand, index) = + dynamic_update_slice->mutable_operand(0)->LatestNonGteAncestorAndIndex(); if (operand->opcode() != HloOpcode::kParameter) { return false; } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 373c1aa5f9..0bf66a4bc8 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -67,7 +67,7 @@ void HloToIrBindings::EmitBasePointersForHlos( // Lookup allocation GetTupleElement operand. const BufferAllocation::Slice slice = buffer_assignment_ - ->GetUniqueTopLevelSlice(LatestNonGteAncestor(non_io_hlo)) + ->GetUniqueTopLevelSlice(non_io_hlo->LatestNonGteAncestor()) .ConsumeValueOrDie(); // We are not in a nested context, so check non-thread-local allocation. CHECK(!slice.allocation()->is_thread_local()); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 6be26dde8f..8fb7a6adda 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -214,12 +214,5 @@ llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset, value->getType()); } -const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo) { - while (hlo->opcode() == HloOpcode::kGetTupleElement) { - hlo = hlo->operand(0); - } - return hlo; -} - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h index 422972762e..06c3205296 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h @@ -53,10 +53,6 @@ llvm::Value* EmitPrintf(tensorflow::StringPiece fmt, llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset, llvm::IRBuilder<>* builder); -// Resolves GetTupleElement instruction operands starting with 'hlo'. -// Returns the first ancestor instruction which is not a GetTupleElement. -const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo); - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 4e6b109b80..88ea5760cb 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -254,27 +254,11 @@ Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution, rhs_instruction, window); } -namespace { - -// Returns the first non-GetTupleElement ancestor instruction of 'hlo'. -// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the -// (possibly nested) tuple indices used on the path from ancestor to 'hlo'. -const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo, - ShapeIndex* index) { - if (hlo->opcode() == HloOpcode::kGetTupleElement) { - const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index); - index->push_back(hlo->tuple_index()); - return operand; - } - return hlo; -} - // Checks if we can emit code for DynamicUpdateSlice to update data in-place. // Returns true if operand 0 of DynamicUpdateSlice and its output buffer // share the same buffer allocation. -// Returns false otherwise. -bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, - HloInstruction* fusion) { +static bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, + HloInstruction* fusion) { CHECK_EQ(HloOpcode::kFusion, fusion->opcode()); HloInstruction* fused_root = fusion->fused_expression_root(); if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice) { @@ -282,9 +266,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, } // Walk DynamicUpdateSlice operand(0) to fused parameter and get its // associated operand. See if it shares an allocation with this operand. + HloInstruction* fusion_operand; ShapeIndex index; - auto* fusion_operand = - LatestNonGteAncestorAndIndex(fused_root->operand(0), &index); + std::tie(fusion_operand, index) = + fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex(); if (fusion_operand->opcode() != HloOpcode::kParameter) { return false; } @@ -292,8 +277,6 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment, return assignment.SharesSliceAtIndex(fusion, {}, operand, index); } -} // namespace - Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { HloInstruction* root = fusion->fused_expression_root(); // HandleFusion specializes reduction from a multi-dimensional array to a 1D @@ -386,7 +369,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(root->Accept(&fused_emitter)); // Recursively lookup 'fusion_operand' for DynamicUpdateSlice operand 0. - auto* fusion_operand = LatestNonGteAncestor(root->operand(0)); + auto* fusion_operand = root->operand(0)->LatestNonGteAncestor(); CHECK_EQ(HloOpcode::kParameter, fusion_operand->opcode()); // Operand(0) the input array which shares an allocation with the output. @@ -1625,7 +1608,7 @@ llvm::Function* IrEmitterUnnested::EmitBasePointersForHloAndItsOperands( // with their operand buffer in 'io_hlos' and 'non_io_hlos' below. std::vector non_io_hlos; for (const HloInstruction* operand : hlo.operands()) { - const HloInstruction* to_lookup = LatestNonGteAncestor(operand); + const HloInstruction* to_lookup = operand->LatestNonGteAncestor(); if (buffer_assignment.HasTopLevelAllocation(to_lookup) && buffer_assignment.GetUniqueTopLevelSlice(to_lookup) .ConsumeValueOrDie() @@ -1665,7 +1648,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( std::vector io_buffers; io_buffers.reserve(io_hlos.size()); for (const HloInstruction* io_hlo : io_hlos) { - io_buffers.push_back(GetAllocationSlice(*LatestNonGteAncestor(io_hlo))); + io_buffers.push_back(GetAllocationSlice(*io_hlo->LatestNonGteAncestor())); } // Create a KernelThunk that launches the kernel that implements "inst". diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 81bccfddbb..e3e482cf85 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1131,6 +1131,29 @@ std::unique_ptr HloInstruction::CloneFusionWithNewOperands( return new_instruction; } +std::pair +HloInstruction::LatestNonGteAncestorAndIndex() const { + const HloInstruction* hlo = this; + ShapeIndex index; + while (hlo->opcode() == HloOpcode::kGetTupleElement) { + index.push_back(hlo->tuple_index()); + hlo = hlo->operand(0); + } + + // We built up index in the reverse order from what we want. + std::reverse(index.begin(), index.end()); + + return {hlo, index}; +} + +const HloInstruction* HloInstruction::LatestNonGteAncestor() const { + const HloInstruction* hlo = this; + while (hlo->opcode() == HloOpcode::kGetTupleElement) { + hlo = hlo->operand(0); + } + return hlo; +} + const Literal& HloInstruction::literal() const { CHECK_EQ(HloOpcode::kConstant, opcode_); return *literal_; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 73c4ebd9f1..011cc8f742 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -508,6 +508,26 @@ class HloInstruction { // Precondition: opcode() == HloOpcode::kGetTupleElement int64 tuple_index() const; + // Returns the first non-GetTupleElement ancestor instruction of 'hlo'. + // If the first non-GTE ancestor is tuple-shaped, populates 'index' with the + // (possibly nested) tuple indices used on the path from ancestor to 'hlo'. + std::pair LatestNonGteAncestorAndIndex() + const; + + std::pair LatestNonGteAncestorAndIndex() { + auto rv = + const_cast(this)->LatestNonGteAncestorAndIndex(); + return {const_cast(rv.first), rv.second}; + } + + // Same as LatestNonGteAncestorAndIndex, but just returns the HloInstruction. + const HloInstruction* LatestNonGteAncestor() const; + + HloInstruction* LatestNonGteAncestor() { + return const_cast( + const_cast(this)->LatestNonGteAncestor()); + } + // Gets/sets the to_apply HloComputation for Call, Map, Reduce, etc. // The setter should only be called by HloModule or HloComputation methods. // -- GitLab From 84f1b9049de86ba5614ce73f91232fd72eefbd1f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 9 Oct 2017 19:47:07 -0700 Subject: [PATCH 212/909] [XLA:LLVM] Rename ops.h to tuple_ops.h. I would like to reclaim ops.h for a different purpose in a later patch. It doesn't make sense to shove it all in the same header because FusedIrEmitter uses (tuple_)ops.h, but my new functions will use FusedIrEmitter. PiperOrigin-RevId: 171622776 --- tensorflow/compiler/xla/service/cpu/BUILD | 2 +- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 2 +- tensorflow/compiler/xla/service/gpu/BUILD | 4 ++-- .../compiler/xla/service/gpu/convolution_folding.cc | 2 +- .../compiler/xla/service/gpu/hlo_to_ir_bindings.cc | 2 +- tensorflow/compiler/xla/service/gpu/ir_emitter.cc | 2 +- .../compiler/xla/service/gpu/ir_emitter_unnested.cc | 2 +- tensorflow/compiler/xla/service/llvm_ir/BUILD | 9 ++++----- .../compiler/xla/service/llvm_ir/fused_ir_emitter.cc | 2 +- tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc | 1 - .../xla/service/llvm_ir/{ops.cc => tuple_ops.cc} | 2 +- .../compiler/xla/service/llvm_ir/{ops.h => tuple_ops.h} | 8 +++++--- 12 files changed, 19 insertions(+), 19 deletions(-) rename tensorflow/compiler/xla/service/llvm_ir/{ops.cc => tuple_ops.cc} (98%) rename tensorflow/compiler/xla/service/llvm_ir/{ops.h => tuple_ops.h} (93%) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index fa6e5b2313..0daaa122f4 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -237,7 +237,7 @@ cc_library( "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", - "//tensorflow/compiler/xla/service/llvm_ir:ops", + "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", "//tensorflow/core:lib", "@llvm//:core", "@llvm//:support", diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index a58db883d3..5474862e45 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -48,7 +48,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 82c32407d3..1d980405dd 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -104,7 +104,7 @@ cc_library( "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "//tensorflow/compiler/xla/service/llvm_ir:ops", + "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", "//tensorflow/core:lib", "@llvm//:core", ], @@ -146,7 +146,7 @@ cc_library( "//tensorflow/compiler/xla/service/llvm_ir:llvm_loop", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", "//tensorflow/compiler/xla/service/llvm_ir:loop_emitter", - "//tensorflow/compiler/xla/service/llvm_ir:ops", + "//tensorflow/compiler/xla/service/llvm_ir:tuple_ops", "//tensorflow/core:lib", "//tensorflow/core:stream_executor_no_cuda", "@llvm//:core", diff --git a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc index 7cf5613ce5..edd04773d1 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_folding.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_folding.cc @@ -275,7 +275,7 @@ MatchBackwardInput(HloInstruction* conv) { Window new_window = old_window; for (size_t i = 0; i < spatial_dims.size(); ++i) { // Restore backward convolution's padding config from the matched pattern. - // See the comment in tensorflow/core/kernels/conv_grad_ops.cc + // See the comment in tensorflow/core/kernels/conv_grad_tuple_ops.cc // for how we convert backward input convolution to a variant of forward // convolution. // diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 0bf66a4bc8..152d226ab0 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index a76d217cac..3862c2190b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -34,7 +34,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 88ea5760cb..cf41623a9b 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -50,7 +50,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index f498f95057..62e404bd82 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -93,7 +93,6 @@ cc_library( deps = [ ":ir_array", ":llvm_loop", - ":ops", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -112,7 +111,7 @@ cc_library( ":ir_array", ":llvm_util", ":loop_emitter", - ":ops", + ":tuple_ops", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", @@ -125,9 +124,9 @@ cc_library( ) cc_library( - name = "ops", - srcs = ["ops.cc"], - hdrs = ["ops.h"], + name = "tuple_ops", + srcs = ["tuple_ops.cc"], + hdrs = ["tuple_ops.h"], deps = [ ":ir_array", ":llvm_util", diff --git a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc index 7d1fad753e..d286c49d68 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/elemental_ir_emitter.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/util.h" diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc index 8bba1776d1..6fa4cd08c9 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h" -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.cc b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc similarity index 98% rename from tensorflow/compiler/xla/service/llvm_ir/ops.cc rename to tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc index ae5c666b7d..6051cbfc6f 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/llvm_ir/ops.h" +#include "tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h" #include #include diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.h b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h similarity index 93% rename from tensorflow/compiler/xla/service/llvm_ir/ops.h rename to tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h index 4e1d9d1080..a75cdc8158 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.h +++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_OPS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_OPS_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_TUPLE_OPS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_TUPLE_OPS_H_ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Value.h" @@ -22,6 +22,8 @@ limitations under the License. #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/types.h" +// Utilities for emitting LLVM IR related to HLO tuples. + namespace xla { namespace llvm_ir { @@ -76,4 +78,4 @@ llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index, } // namespace llvm_ir } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_OPS_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_TUPLE_OPS_H_ -- GitLab From d98519bf80c3a7fc26b41139bf3e753510efffb2 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 9 Oct 2017 20:22:07 -0700 Subject: [PATCH 213/909] [XLA:CPU] Let the elementwise concat op handle being emitted into a degenerate BB. It's possible to create a graph such that an elementwise concat is emitted into an LLVM basic block which lacks a terminator. In this case it's an error to call splitBasicBlock(), so we need to handle this (as is done elsewhere in this file). PiperOrigin-RevId: 171624976 --- .../xla/service/elemental_ir_emitter.cc | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 12fb88f39c..3a8f70a8ef 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -879,17 +879,31 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( const int64 concat_dim = hlo->dimensions(0); auto source_index = target_index; + llvm::BasicBlock* init_block = ir_builder_->GetInsertBlock(); + + // A terminator should be present iff we're emitting code + // into the middle (as opposed to the end) of a basic block. + CHECK_EQ(ir_builder_->GetInsertPoint() == init_block->end(), + init_block->getTerminator() == nullptr); + + llvm::BasicBlock* exit_block; + if (ir_builder_->GetInsertPoint() == init_block->end()) { + exit_block = llvm_ir::CreateBasicBlock( + /*insert_before=*/nullptr, IrName(hlo, "merge"), ir_builder_); + } else { + exit_block = init_block->splitBasicBlock( + ir_builder_->GetInsertPoint(), AsStringRef(IrName(hlo, "merge"))); + init_block->getTerminator()->eraseFromParent(); + } + + llvm_ir::SetToFirstInsertPoint(exit_block, ir_builder_); llvm::PHINode* output = ir_builder_->CreatePHI( llvm_ir::PrimitiveTypeToIrType(hlo->shape().element_type(), ir_builder_), hlo->operands().size()); - llvm::BasicBlock* init_block = ir_builder_->GetInsertBlock(); auto prior_insert_point = ir_builder_->GetInsertPoint(); - llvm::BasicBlock* exit_block = - init_block->splitBasicBlock(output, "concat_merge"); ir_builder_->SetInsertPoint(init_block); - init_block->getTerminator()->eraseFromParent(); for (int64 operand_idx = 0; operand_idx < hlo->operand_count(); ++operand_idx) { -- GitLab From 4f102ffd12d56a2c41dc8b5a5324873ecc0f07e4 Mon Sep 17 00:00:00 2001 From: Igor Ganichev Date: Mon, 9 Oct 2017 20:34:06 -0700 Subject: [PATCH 214/909] Cache last zero tensor in eager gradient computation SPINN and probably other models commonly split large tensors into many equal parts (e.g. along the batch dimension). When we compute the gradient of such split, we often don't have gradients comming from all parts and end up creating zero tensors. This change caches the last created zero tensor and reuses it. It reduces SPINN training time by over 13%. PiperOrigin-RevId: 171625608 --- tensorflow/python/eager/imperative_grad.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/imperative_grad.py b/tensorflow/python/eager/imperative_grad.py index ab6eb87a07..f388d0a148 100644 --- a/tensorflow/python/eager/imperative_grad.py +++ b/tensorflow/python/eager/imperative_grad.py @@ -171,14 +171,23 @@ def imperative_grad( op = ready_ops.pop() op_trace = op_to_entry.pop(op) out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] + + # Cache the last used zero tensor. We reuse it if the next one + # we need is of the same shape and dtype. This is very helpful in + # large splits and should have negligible overhead in other cases. + last_shape_and_dtype = None + last_zeros = None for i in range(len(out_gradients)): if out_gradients[i] is None: # TODO(apassos) this should be in the right device none_indices = _grad_fn_accepts_none_for_indices.get( op_trace.op_type, None) if none_indices is None or i not in none_indices: - out_gradients[i] = vspace.zeros( - *op_trace.output_shape_and_dtype[i]) + shape_and_dtype = op_trace.output_shape_and_dtype[i] + if shape_and_dtype != last_shape_and_dtype: + last_shape_and_dtype = shape_and_dtype + last_zeros = vspace.zeros(*shape_and_dtype) + out_gradients[i] = last_zeros else: out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) -- GitLab From effb22e8a44763901ee2cf55c30290f0b1edb570 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 9 Oct 2017 20:41:00 -0700 Subject: [PATCH 215/909] Use an external constant pool to reduce LLVM compile times LLVM does not deal well with huge arrays emitted inline into the IR. In JIT mode, this change teaches XLA to emit large constant tensors onto a side data structure, which are then symbolically linked to the generated executable. It is important to note that this works only in JIT mode, and my current understanding is that making this work reliably in AOT will be somewhat more difficult. PiperOrigin-RevId: 171626043 --- tensorflow/compiler/xla/service/cpu/BUILD | 25 ++++++ .../compiler/xla/service/cpu/cpu_compiler.cc | 9 +- .../xla/service/cpu/external_constant_pool.cc | 53 ++++++++++++ .../xla/service/cpu/external_constant_pool.h | 64 +++++++++++++++ .../cpu/external_constant_pool_test.cc | 82 +++++++++++++++++++ .../compiler/xla/service/cpu/ir_emitter.cc | 49 ++++++++--- .../compiler/xla/service/cpu/ir_emitter.h | 10 ++- .../xla/service/cpu/simple_orc_jit.cc | 19 ++++- .../compiler/xla/service/cpu/simple_orc_jit.h | 6 ++ 9 files changed, 299 insertions(+), 18 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/external_constant_pool.cc create mode 100644 tensorflow/compiler/xla/service/cpu/external_constant_pool.h create mode 100644 tensorflow/compiler/xla/service/cpu/external_constant_pool_test.cc diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 0daaa122f4..7933e226bf 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -48,6 +48,29 @@ cc_library( alwayslink = True, # Contains per-platform transfer manager registration ) +cc_library( + name = "external_constant_pool", + srcs = ["external_constant_pool.cc"], + hdrs = ["external_constant_pool.h"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "external_constant_pool_test", + srcs = ["external_constant_pool_test.cc"], + deps = [ + ":external_constant_pool", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) + cc_library( name = "cpu_compiler", srcs = ["cpu_compiler.cc"], @@ -130,6 +153,7 @@ cc_library( ":cpu_runtime_neon", ":cpu_runtime_sse4_1", ":disassembler", + ":external_constant_pool", ":runtime_conv2d", ":runtime_matmul", ":runtime_single_threaded_conv2d", @@ -217,6 +241,7 @@ cc_library( ":cpu_options", ":cpu_runtime", ":dot_op_emitter", + ":external_constant_pool", ":ir_emission_utils", ":simple_orc_jit", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 2ad3578969..d0e366de57 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -522,7 +522,8 @@ StatusOr> CpuCompiler::Compile( } IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - &hlo_to_profile_idx, jit->target_machine()); + &hlo_to_profile_idx, jit->target_machine(), + jit->external_constant_pool()); std::unique_ptr> function_names( new std::map()); @@ -602,7 +603,8 @@ StatusOr> CpuCompiler::Compile( // GetEmbeddedComputations guarantees that a called computation occurs // before a caller computation. IrEmitter ir_emitter(*module, *assignment, llvm_module.get(), - &hlo_to_profile_idx, jit->target_machine()); + &hlo_to_profile_idx, jit->target_machine(), + jit->external_constant_pool()); for (auto embedded_computation : computation->MakeEmbeddedComputationsList()) { @@ -771,7 +773,8 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, } IrEmitter ir_emitter(*module, *assignment, &llvm_module, - /*hlo_to_profile_idx=*/nullptr, target_machine.get()); + /*hlo_to_profile_idx=*/nullptr, target_machine.get(), + /*external_constant_pool=*/nullptr); HloComputation* computation = module->entry_computation(); for (auto embedded_computation : computation->MakeEmbeddedComputationsList()) { diff --git a/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc b/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc new file mode 100644 index 0000000000..c9f8e55849 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/external_constant_pool.cc @@ -0,0 +1,53 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/map_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/lib/gtl/flatset.h" + +namespace xla { +namespace cpu { +void ExternalConstantPool::Insert(string name, const Literal& literal, + int64 alignment) { + CHECK(!ShapeUtil::IsTuple(literal.shape())); + CHECK(alignment > 0 && IsPowerOfTwo(static_cast(alignment))); + CHECK(entries_.find(name) == entries_.end()); + + int64 literal_size = ShapeUtil::ByteSizeOf(literal.shape()); + void* raw_pointer; + CHECK_EQ( + posix_memalign(&raw_pointer, std::max(alignment, sizeof(void*)), + literal_size), + 0) + << "failed to allocate " << literal_size << " bytes with alignment of " + << alignment; + + std::memcpy(raw_pointer, literal.InternalData(), literal_size); + entries_.emplace(std::move(name), static_cast(raw_pointer)); +} + +const uint8* ExternalConstantPool::Find(const string& name) { + auto it = entries_.find(name); + return it == entries_.end() ? nullptr : it->second.get(); +} +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/external_constant_pool.h b/tensorflow/compiler/xla/service/cpu/external_constant_pool.h new file mode 100644 index 0000000000..ade28cbcbc --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/external_constant_pool.h @@ -0,0 +1,64 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_EXTERNAL_CONSTANT_POOL_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_EXTERNAL_CONSTANT_POOL_H_ + +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/core/lib/gtl/flatmap.h" + +namespace xla { +namespace cpu { +// An ExternalConstantPool maintains a set of constants kept external to +// generated LLVM IR. These constants are accessed from the IR via globals with +// extern linkage. This current incarnation of ExternalConstantPool only +// supports the JIT CPU backend; the AOT backend is not supported. +// +// Implementation-wise, this is a simple wrapper around a map of strings to byte +// buffers. This simply implementation works in a JIT scenario. This class +// will have to become smarter if we decide to support external constant pools +// on AOT compiles in the future. +class ExternalConstantPool { + public: + // Inserts a buffer with the contents of `literal` into the constant pool with + // the name `name`. It is an error to try to insert two constants with the + // same `name` into the same constant pool. The buffer for literal is aligned + // to `aligment` bytes, and `alignment` must be a power of 2. + // + // The constant pool copies out the contents of `literal` into a buffer it + // owns -- it does not keep pointers to `literal`, or to memory owned by + // `literal`. + void Insert(string name, const Literal& literal, int64 alignment); + + // Find the constant with name `name` in this constant pool. If there isn't + // such constant, return nullptr. + const uint8* Find(const string& name); + + private: + // We need to `free()` pointers allocated into `entries_` since we allocate + // them with `posix_memalign`. + struct FreeDeleter { + void operator()(void* ptr) { free(ptr); } + }; + + tensorflow::gtl::FlatMap> + entries_; +}; +} // namespace cpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_EXTERNAL_CONSTANT_POOL_H_ diff --git a/tensorflow/compiler/xla/service/cpu/external_constant_pool_test.cc b/tensorflow/compiler/xla/service/cpu/external_constant_pool_test.cc new file mode 100644 index 0000000000..9290a4e5df --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/external_constant_pool_test.cc @@ -0,0 +1,82 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { +class ExternalConstantPoolTest : public ::testing::Test {}; + +template +T GetFromBuffer(const uint8* buffer, int64 index) { + T result; + std::memcpy(&result, buffer + index * sizeof(T), sizeof(T)); + return result; +} + +TEST(ExternalConstantPoolTest, Basic) { + ExternalConstantPool constant_pool; + EXPECT_EQ(constant_pool.Find("name-0"), nullptr); + const auto literal = Literal::CreateR2({{1, 2}, {3, 4}}); + constant_pool.Insert("name-0", *literal, 4); + const uint8* constant = constant_pool.Find("name-0"); + ASSERT_NE(constant, nullptr); + + EXPECT_EQ(GetFromBuffer(constant, 0), 1); + EXPECT_EQ(GetFromBuffer(constant, 1), 2); + EXPECT_EQ(GetFromBuffer(constant, 2), 3); + EXPECT_EQ(GetFromBuffer(constant, 3), 4); + + EXPECT_EQ(constant_pool.Find("name-1"), nullptr); +} + +TEST(ExternalConstantPoolTest, RowMinorLayout) { + ExternalConstantPool constant_pool; + EXPECT_EQ(constant_pool.Find("name-0"), nullptr); + const auto literal = Literal::CreateR2WithLayout( + {{1, 2}, {3, 4}}, LayoutUtil::MakeLayout({0, 1})); + constant_pool.Insert("name-0", *literal, 4); + const uint8* constant = constant_pool.Find("name-0"); + ASSERT_NE(constant, nullptr); + + EXPECT_EQ(GetFromBuffer(constant, 0), 1); + EXPECT_EQ(GetFromBuffer(constant, 1), 3); + EXPECT_EQ(GetFromBuffer(constant, 2), 2); + EXPECT_EQ(GetFromBuffer(constant, 3), 4); +} + +TEST(ExternalConstantPoolTest, Alignment) { + ExternalConstantPool constant_pool; + EXPECT_EQ(constant_pool.Find("name-0"), nullptr); + + for (int i = 0; i < 8; i++) { + int64 alignment = 1 << i; + string name = tensorflow::strings::StrCat("name-", i); + + const auto literal = Literal::CreateR2({{1, 2}, {3, 4}}); + constant_pool.Insert(name, *literal, alignment); + + const uint8* constant = constant_pool.Find(name); + ASSERT_NE(constant, nullptr); + EXPECT_EQ(reinterpret_cast(constant) % alignment, 0); + } +} + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 5474862e45..89a911d070 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -75,7 +75,8 @@ IrEmitter::IrEmitter( const HloModule& hlo_module, const BufferAssignment& assignment, llvm::Module* llvm_module, const std::unordered_map* hlo_to_profile_idx, - llvm::TargetMachine* target_machine) + llvm::TargetMachine* target_machine, + ExternalConstantPool* external_constant_pool) : assignment_(assignment), module_(llvm_module), arch_type_(llvm::Triple(llvm_module->getTargetTriple()).getArch()), @@ -86,7 +87,8 @@ IrEmitter::IrEmitter( parallel_cpu_backend_( options::CpuParallelBackendRequested(hlo_module_config_)), is_top_level_computation_(false), - target_machine_features_(target_machine) { + target_machine_features_(target_machine), + external_constant_pool_(external_constant_pool) { ir_builder_.setFastMathFlags(llvm_ir::GetFastMathFlags( /*fast_math_enabled=*/hlo_module_config_.debug_options() .xla_enable_fast_math())); @@ -272,16 +274,39 @@ Status IrEmitter::HandleBitcast(HloInstruction* bitcast) { Status IrEmitter::HandleConstant(HloInstruction* constant, const Literal& literal) { VLOG(2) << "HandleConstant: " << constant->ToString(); - llvm::Constant* initializer = - llvm_ir::ConvertLiteralToIrConstant(literal, &ir_builder_); - llvm::GlobalVariable* global_for_const = new llvm::GlobalVariable( - /*Module=*/*module_, - /*Type=*/initializer->getType(), - /*isConstant=*/true, - /*Linkage=*/llvm::GlobalValue::PrivateLinkage, - /*Initializer=*/initializer, - /*Name=*/""); - global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); + llvm::GlobalVariable* global_for_const; + + // We avoid creating large constants in the LLVM IR since LLVM is not + // efficient for large constant arrays. We still emit "small enough" constant + // arrays into the Ir, in the off chance the LLVM optimizer can do something + // interesting with it. + const int kMaxInternalConstantSizeInBytes = 128; + if (external_constant_pool_ && + ByteSizeOf(literal.shape()) >= kMaxInternalConstantSizeInBytes) { + string global_name = tensorflow::strings::StrCat( + "constant_global_", external_global_constant_counter_++); + global_for_const = new llvm::GlobalVariable( + /*Module=*/*module_, + /*Type=*/IrShapeType(literal.shape()), + /*isConstant=*/true, + /*Linkage=*/llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, + /*Name=*/AsStringRef(global_name)); + global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); + external_constant_pool_->Insert(global_name, literal, + MinimumAlignmentForShape(literal.shape())); + } else { + llvm::Constant* initializer = + llvm_ir::ConvertLiteralToIrConstant(literal, &ir_builder_); + global_for_const = new llvm::GlobalVariable( + /*Module=*/*module_, + /*Type=*/initializer->getType(), + /*isConstant=*/true, + /*Linkage=*/llvm::GlobalValue::PrivateLinkage, + /*Initializer=*/initializer, + /*Name=*/""); + global_for_const->setAlignment(MinimumAlignmentForShape(literal.shape())); + } emitted_value_[constant] = global_for_const; VLOG(2) << " emitted value: " << llvm_ir::DumpToString(*global_for_const); VLOG(2) << " its type: " diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index b15026b6da..ba02f5f778 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -29,6 +29,7 @@ limitations under the License. #include "llvm/IR/Value.h" #include "llvm/Target/TargetMachine.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" +#include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -104,11 +105,15 @@ class IrEmitter : public DfsHloVisitorWithDefault { // llvm_module: the LLVM module to emit IR into. // hlo_to_profile_idx: the mapping from HLO to its index in the profiling // array. + // external_constant_pool: if non-null, points to an ExternalConstantPool + // instance into which the Ir emitter can spill + // constants. IrEmitter(const HloModule& hlo_module, const BufferAssignment& assignment, llvm::Module* llvm_module, const std::unordered_map* hlo_to_profile_idx, - llvm::TargetMachine* target_machine); + llvm::TargetMachine* target_machine, + ExternalConstantPool* external_constant_pool); ~IrEmitter() override; // Emit and return the given HLO computation as an LLVM IR @@ -601,6 +606,9 @@ class IrEmitter : public DfsHloVisitorWithDefault { TargetMachineFeatures target_machine_features_; + int64 external_global_constant_counter_ = 0; + ExternalConstantPool* external_constant_pool_; + TF_DISALLOW_COPY_AND_ASSIGN(IrEmitter); }; diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index c3c11df090..c614e334a8 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -117,8 +117,20 @@ const JITSymbolTable& GetJITSymbolTable() { } // A simple SymbolResolver that delegates to the host dynamic linker. -struct SimpleResolver : public llvm::JITSymbolResolver { +class SimpleResolver : public llvm::JITSymbolResolver { + public: + explicit SimpleResolver(ExternalConstantPool* external_constant_pool) + : external_constant_pool_(external_constant_pool) {} + llvm::JITSymbol findSymbol(const std::string& name) override { + string name_as_string(name); + if (const uint8* from_constant_pool = + external_constant_pool_->Find(string(name))) { + return llvm::JITEvaluatedSymbol( + reinterpret_cast(from_constant_pool), + llvm::JITSymbolFlags::None); + } + std::string canonical_name = CanonicalizeSymbol(name); const JITSymbolTable& jit_symbol_table = GetJITSymbolTable(); @@ -136,6 +148,9 @@ struct SimpleResolver : public llvm::JITSymbolResolver { llvm::JITSymbol findSymbolInLogicalDylib(const std::string& name) override { return nullptr; } + + private: + ExternalConstantPool* external_constant_pool_; }; llvm::SmallVector DetectMachineAttributes() { @@ -205,7 +220,7 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options, SimpleOrcJIT::ModuleHandleT SimpleOrcJIT::AddModule( std::unique_ptr module) { auto handle = cantFail(compile_layer_.addModule( - std::move(module), MakeUnique())); + std::move(module), MakeUnique(external_constant_pool()))); module_handles_.push_back(handle); return handle; } diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h index e476c0e381..ded01e9e4d 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.h @@ -27,6 +27,7 @@ limitations under the License. #include "llvm/Target/TargetMachine.h" #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h" #include "tensorflow/compiler/xla/service/cpu/disassembler.h" +#include "tensorflow/compiler/xla/service/cpu/external_constant_pool.h" #include "tensorflow/compiler/xla/types.h" namespace xla { @@ -90,6 +91,10 @@ class SimpleOrcJIT { llvm::TargetMachine* target_machine() const { return target_machine_.get(); } + ExternalConstantPool* external_constant_pool() { + return &external_constant_pool_; + } + private: std::vector module_handles_; std::unique_ptr target_machine_; @@ -97,6 +102,7 @@ class SimpleOrcJIT { const llvm::DataLayout data_layout_; ObjLayerT object_layer_; CompileLayerT compile_layer_; + ExternalConstantPool external_constant_pool_; }; } // namespace cpu -- GitLab From 1be36dd6d675998842824f69285f146b95615042 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 9 Oct 2017 21:01:13 -0700 Subject: [PATCH 216/909] [TF:XLA] Re-enable strided slice tests that now pass. PiperOrigin-RevId: 171627028 --- tensorflow/compiler/tests/BUILD | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c8269b3d5b..eded6dc463 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -208,11 +208,6 @@ tf_xla_py_test( name = "slice_ops_test", size = "small", srcs = ["slice_ops_test.py"], - # TODO(b/62962492): Test fails with assertion error. - tags = [ - "manual", - "notap", - ], deps = [ ":xla_test", "//tensorflow/python:array_ops", -- GitLab From 90f257e0fc12e54d96d1e8a2afd374d1a2723577 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 9 Oct 2017 21:28:14 -0700 Subject: [PATCH 217/909] Fix ReshapeMover bug with reshaped constants; add HloVerifiedTestBase. An example of a bad ReshapeMover rewrite: BEFORE %reshape.1 = f32[1,1,128] reshape(f32[1,128] %dot) %constant = f32[128] constant({...}) %reshape.2 = f32[1,1,128] reshape(f32[128] %constant) %add = f32[1,1,128] add(f32[1,1,128] %reshape.1, f32[1,1,128] %reshape.2) AFTER %constant = f32[128] constant({...}) %add = f32[1,128] add(f32[1,128] %dot, f32[128] %constant) %reshape = f32[1,1,128] reshape(f32[1,128] %add) The problem in AFTER is the add now contains an implicit broadcast. One way to fix this is to re-shape the %constant to f32[1,128] before the %add. Instead of that, the fix introduced in this CL is to simply prevent the ReshapeMover from moving the reshapes in this case. A comment in reshape_mover.cc describes the complexities that led to this choice. Also added HloVerifiedTestBase, which keeps track of a default HloModule, and automatically runs HloVerifier at the end of every test. This is useful for many HLO tests; the tests of various passes can probably all use this. Three existing issues in reshape_mover_test.cc were found and fixed as a result. PiperOrigin-RevId: 171628656 --- tensorflow/compiler/xla/service/BUILD | 2 +- .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + .../compiler/xla/service/reshape_mover.cc | 275 +++++++++--------- .../compiler/xla/service/reshape_mover.h | 2 +- .../xla/service/reshape_mover_test.cc | 124 +++++--- tensorflow/compiler/xla/tests/BUILD | 16 + .../xla/tests/hlo_verified_test_base.cc | 69 +++++ .../xla/tests/hlo_verified_test_base.h | 63 ++++ 8 files changed, 371 insertions(+), 182 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/hlo_verified_test_base.cc create mode 100644 tensorflow/compiler/xla/tests/hlo_verified_test_base.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 4b28467725..0c20a05714 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1118,7 +1118,7 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", ], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index d0e366de57..386800d221 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -269,6 +269,8 @@ Status CpuCompiler::RunHloPasses(HloModule* module) { { auto& pass = pipeline.AddPass>("simplification"); + pass.AddInvariantChecker(ShapeSizeBytesFunction()); + pass.AddPass( /*rewrite_training_op=*/true, /*rewrite_inference_op=*/true, diff --git a/tensorflow/compiler/xla/service/reshape_mover.cc b/tensorflow/compiler/xla/service/reshape_mover.cc index 404fd3e6d7..0fb90230f2 100644 --- a/tensorflow/compiler/xla/service/reshape_mover.cc +++ b/tensorflow/compiler/xla/service/reshape_mover.cc @@ -48,23 +48,28 @@ namespace xla { namespace { -// Checks if an instruction can change its shape simply by adjusting metadata. -// This is the case if it is: -// -// - an instruction does not have any producers like Constants -// or Rng instruction, or is a scalar. -// -// Or -// -// - an reshape/transpose instruction with an operand that can trivially change -// its shape. -bool InstructionCanTriviallyChangeShape(const HloInstruction* instruction) { - // Reshape/Transposes are only trivial if their operand is trivial. - if (instruction->opcode() == HloOpcode::kReshape || - instruction->opcode() == HloOpcode::kTranspose) { - CHECK_EQ(instruction->operand_count(), 1); - return InstructionCanTriviallyChangeShape(instruction->operand(0)); - } +bool IsReshapeOrTranspose(const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kReshape || + instruction->opcode() == HloOpcode::kTranspose; +} + +// Returns true iff `instruction` can change its shape simply by adjusting +// metadata. +bool CanTriviallyChangeShape(const HloInstruction* instruction) { + // NOTE: Technically a sequence of reshape(reshape(constant)) is also + // trivially reshapable, so we might be tempted to simply recurse if + // IsReshapeOrTranspose(instruction)==true. + // + // But it's not that simple. E.g. reshape(reshape(rng)) is only trivially + // reshapable if *all* instructions in the chain have user_count == 1. And + // reshape(scalar) isn't trivial at all if the reshape itself isn't scalar; we + // rely on implicit scalar broadcast for scalars to be trivial. In addition, + // these cases make it harder to maintain correctness of the UpdateOperand + // logic below. + // + // So don't handle these chains, unless you update the tests and code to deal + // with these properly. One idea is to add a pass immediately beforehand that + // collapses trivial runs of reshapes / transposes. // Scalars can operate with any shape. if (ShapeUtil::IsScalar(instruction->shape())) { @@ -93,9 +98,8 @@ HloInstruction* FirstNonScalarAndNonTrivialReshapeOperand( const HloInstruction* hlo) { for (HloInstruction* operand : hlo->operands()) { if (!ShapeUtil::IsScalar(operand->shape()) && - ((operand->opcode() == HloOpcode::kReshape || - operand->opcode() == HloOpcode::kTranspose) && - !InstructionCanTriviallyChangeShape(operand->operand(0)))) { + IsReshapeOrTranspose(operand) && + !CanTriviallyChangeShape(operand->operand(0))) { VLOG(5) << "Found first non-scalar and non-trivial reshape operand of " << hlo->ToStringNoMetadata() << ":\n\t" << operand->ToStringNoMetadata(); @@ -122,28 +126,15 @@ bool AreEquivalentReshapes(const HloInstruction* a, const HloInstruction* b) { } } -// Returns true if an elementwise operation has all operands that can easily -// change shape. Operands can easily change shape if they are all -// reshapes/transposes to and from the same shape. Additionally, operands like -// constant, rng, and any scalar change shape with only an adjustment of -// metadata. -bool IsElementwiseOfEquivalentReshapesOrTransposes( - const HloInstruction* instruction) { - const auto& operands = instruction->operands(); - HloInstruction* first_reshape_operand = - FirstNonScalarAndNonTrivialReshapeOperand(instruction); - // If there are no non-trivial reshapes or transposes, then there is nothing - // to sink below the elementwise operation. - if (!first_reshape_operand) { - return false; - } - VLOG(3) << "** Checking whether instruction is an elementwise operation of " - "equivalent reshapes/transposes: " +// Returns true if all operands of `instruction` can easily change shape. +// Operands can easily change shape if they are all reshapes/transposes to and +// from the same shape. Additionally, operands like constant, rng, and any +// scalar change shape with only an adjustment of metadata. +bool AllOperandsHaveEasyShapeChanges( + const HloInstruction* instruction, + const HloInstruction* first_reshape_operand) { + VLOG(3) << "** Checking whether all operands have easy shape changes: " << instruction->ToStringNoMetadata(); - bool result = (instruction->user_count() > 0 || - instruction == instruction->parent()->root_instruction()) && - instruction->IsElementwise() && !operands.empty(); - // Check whether all operands: // 0. Have the same dimensions as the output -- if not, it may be // implicitly broadcast, which can confound the movement's @@ -155,66 +146,117 @@ bool IsElementwiseOfEquivalentReshapesOrTransposes( // or // 2. Are one of kConstant, kRng, and scalars that can change shape // trivially, - if (result) { - for (auto& operand : operands) { - if (!ShapeUtil::SameDimensions(operand->shape(), instruction->shape())) { - VLOG(5) << "Operand shape differs from output shape; may be " - "implicitly broadcast, so preventing " - "movement\n\toperand: " - << operand->ToStringNoMetadata() - << "\n\tinstruction: " << instruction->ToStringNoMetadata(); - result = false; - break; - } - - if (AreEquivalentReshapes(first_reshape_operand, operand)) { - VLOG(5) << "Are equivalent reshapes:\n\tfirst_reshape_operand: " - << first_reshape_operand->ToStringNoMetadata() - << "\n\toperand: " << operand->ToStringNoMetadata(); - continue; - } + for (const HloInstruction* operand : instruction->operands()) { + if (!ShapeUtil::SameDimensions(operand->shape(), instruction->shape())) { + VLOG(5) << "Operand shape differs from output shape; may be " + "implicitly broadcast, so preventing " + "movement\n\toperand: " + << operand->ToStringNoMetadata() + << "\n\tinstruction: " << instruction->ToStringNoMetadata(); + return false; + } - if (InstructionCanTriviallyChangeShape(operand)) { - VLOG(5) << "Operand can trivially change shape: " - << operand->ToStringNoMetadata(); - continue; - } + if (AreEquivalentReshapes(first_reshape_operand, operand)) { + VLOG(5) << "Are equivalent reshapes:\n\tfirst_reshape_operand: " + << first_reshape_operand->ToStringNoMetadata() + << "\n\toperand: " << operand->ToStringNoMetadata(); + continue; + } - // TODO(someone): Look into supporting general ops for the operands as - // well. - VLOG(5) << "Operand is neither equalivant to the first Reshape operand" - "nor can trivially change shape: " + if (CanTriviallyChangeShape(operand)) { + VLOG(5) << "Operand can trivially change shape: " << operand->ToStringNoMetadata(); - result = false; - break; + continue; } + + // TODO(someone): Look into supporting general ops for the operands as + // well. + VLOG(5) << "Operand is neither equalivant to the first Reshape operand" + "nor can trivially change shape: " + << operand->ToStringNoMetadata(); + return false; } - VLOG(3) << "ElementwiseOfEquivalentReshapesOrTransposes result for " - << instruction->ToStringNoMetadata() << ": " << result; - return result; + VLOG(3) << "All operands have easy shape changes: " + << instruction->ToStringNoMetadata(); + return true; +} + +// This function is called once we've decided to sink reshape/transpose operands +// across an instruction. It returns an updated `operand` with a shape that +// plays nicely with `new_operand_shape`; either it has the same shape (of the +// correct type), or it is a scalar that may be implicitly broadcast. +HloInstruction* UpdateOperand(HloComputation* computation, + const HloInstruction* first_reshape_operand, + const Shape& new_operand_shape, + HloInstruction* operand) { + const PrimitiveType element_type = operand->shape().element_type(); + const Shape new_shape = + ShapeUtil::ChangeElementType(new_operand_shape, element_type); + + switch (operand->opcode()) { + case HloOpcode::kConstant: { + if (first_reshape_operand->opcode() == HloOpcode::kReshape) { + VLOG(5) << "Adding reshape to kConstant operand"; + return computation->AddInstruction( + HloInstruction::CreateReshape(new_shape, operand)); + } else { + CHECK(first_reshape_operand->opcode() == HloOpcode::kTranspose); + VLOG(5) << "Adding transpose to kConstant operand"; + std::vector inverse_permutation = + InversePermutation(first_reshape_operand->dimensions()); + return computation->AddInstruction(HloInstruction::CreateTranspose( + new_shape, operand, inverse_permutation)); + } + } + case HloOpcode::kRng: { + CHECK_EQ(operand->user_count(), 1); + VLOG(5) << "Cloning kRng operand with new shape"; + return computation->AddInstruction( + operand->CloneWithNewOperands(new_shape, operand->operands())); + } + case HloOpcode::kReshape: + case HloOpcode::kTranspose: { + VLOG(5) << "Using existing operand of kReshape or kTranspose"; + return operand->mutable_operand(0); + } + default: + LOG(FATAL) << "Unexpected operand opcode during update: " << operand; + } } // Try to sink any reshape or transpose operands of `instruction` across it. We // do so if `instruction` is elementwise and all operands are either equivalent -// reshapes/transposes or are trivially reshapable. Note that no move is -// performend if there is no nontrivial reshapes/transposes. +// reshapes/transposes or are trivially reshapable. StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, HloInstruction* instruction) { - if (!IsElementwiseOfEquivalentReshapesOrTransposes(instruction)) { + // Only perform sinks for live elementwise instructions with operands. + const bool is_dead = instruction->user_count() == 0 && + instruction != computation->root_instruction(); + if (!instruction->IsElementwise() || instruction->operands().empty() || + is_dead) { return false; } - HloInstruction* old_reshape = + // Only perform sinks if there are any nontrivial reshape/transpose operands. + const HloInstruction* first_reshape_operand = FirstNonScalarAndNonTrivialReshapeOperand(instruction); - TF_RET_CHECK(old_reshape != nullptr); - Shape new_elementwise_shape = old_reshape->operand(0)->shape(); + if (!first_reshape_operand) { + return false; + } + + // Only perform sinks if all operands can easily change shape. + if (!AllOperandsHaveEasyShapeChanges(instruction, first_reshape_operand)) { + return false; + } - VLOG(3) << "** Trying to sink reshape or transpose: " - << instruction->ToStringNoMetadata() - << "\n\told reshape: " << old_reshape->ToStringNoMetadata() - << "\n\tnew elementwise shape: " - << ShapeUtil::HumanString(new_elementwise_shape); + // At this point we've decided to sink reshape/transpose operands. + const Shape& new_operand_shape = first_reshape_operand->operand(0)->shape(); + VLOG(3) << "** Sinking reshape or transpose: " + << instruction->ToStringNoMetadata() << "\n\tfirst reshape operand: " + << first_reshape_operand->ToStringNoMetadata() + << "\n\tnew operand shape: " + << ShapeUtil::HumanString(new_operand_shape); auto operands = instruction->operands(); for (size_t i = 0; i < operands.size(); ++i) { @@ -224,55 +266,19 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, if (ShapeUtil::IsScalar(operands[i]->shape())) { continue; } - PrimitiveType element_type = operands[i]->shape().element_type(); - switch (operands[i]->opcode()) { - case HloOpcode::kConstant: { - if (old_reshape->opcode() == HloOpcode::kReshape) { - VLOG(3) << "Creating reshape for kConstant operand " << i << ": " - << operands[i]->ToStringNoMetadata(); - operands[i] = instruction->parent()->AddInstruction( - HloInstruction::CreateReshape( - ShapeUtil::ChangeElementType(new_elementwise_shape, - element_type), - operands[i])); - } else { - TF_RET_CHECK(old_reshape->opcode() == HloOpcode::kTranspose); - std::vector inverse_permutation = - InversePermutation(old_reshape->dimensions()); - operands[i] = instruction->parent()->AddInstruction( - HloInstruction::CreateTranspose( - ShapeUtil::ChangeElementType(new_elementwise_shape, - element_type), - operands[i], inverse_permutation)); - } - break; - } - case HloOpcode::kRng: { - CHECK_EQ(operands[i]->user_count(), 1); - operands[i] = instruction->parent()->AddInstruction( - operands[i]->CloneWithNewOperands( - ShapeUtil::ChangeElementType(new_elementwise_shape, - element_type), - operands[i]->operands())); - break; - } - case HloOpcode::kReshape: - case HloOpcode::kTranspose: - operands[i] = operands[i]->mutable_operand(0); - break; - default: - LOG(FATAL) << "Unexpected opcode while trying to sink reshapes or " - "transposes."; - } + VLOG(3) << "Updating operand #" << i << ": " + << operands[i]->ToStringNoMetadata(); + operands[i] = UpdateOperand(computation, first_reshape_operand, + new_operand_shape, operands[i]); } if (HloOpcode::kFusion == instruction->opcode()) { // Here we already know `instruction` is elementwise, and no operand is - // implicit broadcast as if it were the operands would not be equivalent - // reshapes, so all the fused instructions have the same dimensions. + // implicit broadcast as if it were the operands would not have easy shape + // changes, so all the fused instructions have the same dimensions. for (const auto& fused_instruction : instruction->fused_instructions()) { Shape* shape = fused_instruction->mutable_shape(); - *shape->mutable_dimensions() = new_elementwise_shape.dimensions(); - *shape->mutable_layout() = new_elementwise_shape.layout(); + *shape->mutable_dimensions() = new_operand_shape.dimensions(); + *shape->mutable_layout() = new_operand_shape.layout(); } } HloInstruction* new_elementwise = @@ -284,12 +290,12 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, // // In this case, convert' should have the same element type as // `convert` and the same dimensions as operands[0]. - ShapeUtil::ChangeElementType(new_elementwise_shape, + ShapeUtil::ChangeElementType(new_operand_shape, instruction->shape().element_type()), operands)); std::unique_ptr new_reshape; - switch (old_reshape->opcode()) { + switch (first_reshape_operand->opcode()) { case HloOpcode::kReshape: VLOG(3) << "Creating new reshape for new elementwise op: " << new_elementwise->ToStringNoMetadata(); @@ -297,8 +303,9 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, HloInstruction::CreateReshape(instruction->shape(), new_elementwise); break; case HloOpcode::kTranspose: - new_reshape = HloInstruction::CreateTranspose( - instruction->shape(), new_elementwise, old_reshape->dimensions()); + new_reshape = + HloInstruction::CreateTranspose(instruction->shape(), new_elementwise, + first_reshape_operand->dimensions()); break; default: LOG(FATAL) << "Bad opcode"; @@ -312,6 +319,8 @@ StatusOr TrySinkReshapeOrTranspose(HloComputation* computation, StatusOr ReshapeMover::Run(HloModule* module) { bool changed = false; + VLOG(2) << "Pre ReshapeMover HLO:"; + XLA_VLOG_LINES(2, module->ToString()); for (auto* comp : module->MakeNonfusionComputations()) { for (HloInstruction* instruction : comp->MakeInstructionPostOrder()) { TF_ASSIGN_OR_RETURN(bool did_change, @@ -319,6 +328,8 @@ StatusOr ReshapeMover::Run(HloModule* module) { changed |= did_change; } } + VLOG(2) << "Post ReshapeMover HLO:"; + XLA_VLOG_LINES(2, module->ToString()); return changed; } diff --git a/tensorflow/compiler/xla/service/reshape_mover.h b/tensorflow/compiler/xla/service/reshape_mover.h index b7e0a46939..1f59e3b314 100644 --- a/tensorflow/compiler/xla/service/reshape_mover.h +++ b/tensorflow/compiler/xla/service/reshape_mover.h @@ -26,7 +26,7 @@ namespace xla { // them inputward also. class ReshapeMover : public HloPassInterface { public: - tensorflow::StringPiece name() const override { return "reshape-motion"; } + tensorflow::StringPiece name() const override { return "reshape-mover"; } StatusOr Run(HloModule* module) override; }; diff --git a/tensorflow/compiler/xla/service/reshape_mover_test.cc b/tensorflow/compiler/xla/service/reshape_mover_test.cc index a81d3f4eb3..aac8638a54 100644 --- a/tensorflow/compiler/xla/service/reshape_mover_test.cc +++ b/tensorflow/compiler/xla/service/reshape_mover_test.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -34,7 +34,7 @@ namespace op = xla::testing::opcode_matchers; namespace xla { namespace { -using ReshapeMoverTest = HloTestBase; +using ReshapeMoverTest = HloVerifiedTestBase; TEST_F(ReshapeMoverTest, ReshapesWithDifferentInputShapesNotMoved) { HloComputation::Builder builder(TestName()); @@ -50,13 +50,12 @@ TEST_F(ReshapeMoverTest, ReshapesWithDifferentInputShapesNotMoved) { builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, reshape1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), op::Reshape(param1))); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), op::Reshape(param1))); @@ -89,13 +88,12 @@ TEST_F(ReshapeMoverTest, 1ConstantAnd1ReshapesOnRngNotMoved) { builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, const1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(rng0), const1)); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(rng0), const1)); @@ -115,13 +113,12 @@ TEST_F(ReshapeMoverTest, ScalarReshapesNotMoved) { builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, reshape1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), op::Reshape(param1))); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT( computation->root_instruction(), @@ -142,12 +139,11 @@ TEST_F(ReshapeMoverTest, EquivalentReshapesMoved) { builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, reshape1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), op::Reshape(param1))); - EXPECT_TRUE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Reshape(op::Add(param0, param1))); @@ -193,21 +189,19 @@ TEST_F(ReshapeMoverTest, 1ConstantAnd2ReshapesMoved) { builder.AddInstruction(HloInstruction::CreateReshape(root_shape, param2)); builder.AddInstruction(HloInstruction::CreateTernary( - ShapeUtil::MakeShape(PRED, {2, 3}), HloOpcode::kSelect, const0, reshape1, - reshape2)); + root_shape, HloOpcode::kSelect, const0, reshape1, reshape2)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Select(const0, reshape1, reshape2)); - EXPECT_TRUE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Reshape(op::Select(op::Reshape(const0), param1, param2))); - EXPECT_EQ(const0->shape().DebugString(), + EXPECT_EQ(root_shape.DebugString(), computation->root_instruction()->shape().DebugString()); } @@ -228,17 +222,16 @@ TEST_F(ReshapeMoverTest, 1ParameterAnd1ReshapeNotMoved) { 0, ShapeUtil::MakeShape(F32, {1, 8, 1, 7}), "param0")); auto reshape0 = builder.AddInstruction(HloInstruction::CreateReshape(root_shape, param0)); - auto param1 = builder.AddInstruction(HloInstruction::CreateParameter( - 1, ShapeUtil::MakeShape(F32, {1, 8, 1, 7}), "param1")); + auto param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, root_shape, "param1")); builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, param1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), param1)); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), param1)); @@ -260,7 +253,7 @@ TEST_F(ReshapeMoverTest, 1ParameterAnd1ReshapeNotMoved) { // trivial reshapes. TEST_F(ReshapeMoverTest, 2TrivialConstantReshapeNotMoved) { HloComputation::Builder builder(TestName()); - auto root_shape = ShapeUtil::MakeShape(F32, {2, 3}); + auto root_shape = ShapeUtil::MakeShape(F32, {3, 2}); auto const0 = builder.AddInstruction(HloInstruction::CreateConstant( Literal::CreateR2({{1, 2, 3}, {4, 5, 6}}))); auto reshape0 = @@ -272,18 +265,17 @@ TEST_F(ReshapeMoverTest, 2TrivialConstantReshapeNotMoved) { builder.AddInstruction(HloInstruction::CreateReshape(root_shape, const1)); auto pred = builder.AddInstruction(HloInstruction::CreateParameter( - 0, ShapeUtil::MakeShape(PRED, {1, 3, 1, 2}), "pred")); + 0, ShapeUtil::MakeShape(PRED, {3, 2}), "pred")); builder.AddInstruction(HloInstruction::CreateTernary( root_shape, HloOpcode::kSelect, pred, reshape0, reshape1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Select(pred, op::Reshape(const0), op::Reshape(const1))); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Select(pred, op::Reshape(const0), op::Reshape(const1))); @@ -323,13 +315,12 @@ TEST_F(ReshapeMoverTest, 1NonTrivialReshapeMoved) { builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, const1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Add(op::Reshape(param0), const1)); - EXPECT_TRUE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Reshape(op::Add(param0, op::Reshape(const1)))); @@ -337,6 +328,48 @@ TEST_F(ReshapeMoverTest, 1NonTrivialReshapeMoved) { computation->root_instruction()->shape().DebugString()); } +// For a graph that looks like: +// +// +- reshape0 - param0 (shape A) +// | +// +- reshape1 - const1 (shape B) +// | +// add +// +// There is 1 non-trivial reshape (reshape0). It's not clear whether reshape1 +// should be trivial or not; conceptually it's trivial, but handling it would +// complicate the rest of our logic. +// +// For now we treat it as non-trivial, so we verify that we don't sink the +// reshapes in this case. +TEST_F(ReshapeMoverTest, 1NonTrivialReshapeWith1ReshapedConstNotMoved) { + HloComputation::Builder builder(TestName()); + auto root_shape = ShapeUtil::MakeShape(F32, {1, 1, 3}); + auto param0 = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(F32, {1, 3}), "param0")); + auto const1 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({9, 8, 7}))); + auto reshape0 = + builder.AddInstruction(HloInstruction::CreateReshape(root_shape, param0)); + auto reshape1 = + builder.AddInstruction(HloInstruction::CreateReshape(root_shape, const1)); + + builder.AddInstruction(HloInstruction::CreateBinary( + root_shape, HloOpcode::kAdd, reshape0, reshape1)); + + auto computation = module().AddEntryComputation(builder.Build()); + + EXPECT_THAT(computation->root_instruction(), + op::Add(op::Reshape(param0), op::Reshape(const1))); + + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); + + EXPECT_THAT(computation->root_instruction(), + op::Add(op::Reshape(param0), op::Reshape(const1))); + EXPECT_EQ(root_shape.DebugString(), + computation->root_instruction()->shape().DebugString()); +} + TEST_F(ReshapeMoverTest, EquivalentReshapesMovedAcrossFusion) { HloComputation::Builder builder(TestName()); auto root_shape = ShapeUtil::MakeShape(F32, {8, 7}); @@ -351,15 +384,14 @@ TEST_F(ReshapeMoverTest, EquivalentReshapesMovedAcrossFusion) { auto add = builder.AddInstruction(HloInstruction::CreateBinary( root_shape, HloOpcode::kAdd, reshape0, reshape1)); - HloModule module(TestName()); - auto computation = module.AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); computation->CreateFusionInstruction({add}, HloInstruction::FusionKind::kLoop); EXPECT_THAT(computation->root_instruction(), op::Fusion(op::Reshape(param0), op::Reshape(param1))); - EXPECT_TRUE(ReshapeMover().Run(&module).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Reshape(op::Fusion(param0, param1))); @@ -386,14 +418,13 @@ TEST_F(ReshapeMoverTest, EquivalentReshapesMovedAcrossSelect) { builder.AddInstruction(HloInstruction::CreateTernary( root_shape, HloOpcode::kSelect, reshape_pred, reshape0, reshape1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT( computation->root_instruction(), op::Select(op::Reshape(pred), op::Reshape(param0), op::Reshape(param1))); - EXPECT_TRUE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Reshape(op::Select(pred, param0, param1))); @@ -416,12 +447,11 @@ TEST_F(ReshapeMoverTest, ScalarReshapeNotMovedAcrossSelect) { auto select = builder.AddInstruction(HloInstruction::CreateTernary( root_shape, HloOpcode::kSelect, reshape_pred, param0, param1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Select(op::Reshape(pred), param0, param1)); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Select(op::Reshape(pred), param0, param1)); @@ -468,12 +498,11 @@ TEST_F(ReshapeMoverTest, ImplicitlyBroadcastReshapeIsNotMovedBug37787999) { auto multiply = builder.AddInstruction(HloInstruction::CreateBinary( constant->shape(), HloOpcode::kMultiply, constant, reshape)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT(computation->root_instruction(), op::Multiply(op::Constant(), op::Reshape(param0))); - EXPECT_FALSE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_FALSE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT(computation->root_instruction(), op::Multiply(op::Constant(), op::Reshape(param0))); @@ -517,15 +546,14 @@ TEST_F(ReshapeMoverTest, MultiplePasses) { builder.AddInstruction(HloInstruction::CreateBinary(shape3, HloOpcode::kAdd, reshape2, reshape3)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); + auto computation = module().AddEntryComputation(builder.Build()); EXPECT_THAT( computation->root_instruction(), op::Add(op::Reshape(param2), op::Reshape(op::Add(op::Reshape(param0), op::Reshape(param1))))); - EXPECT_TRUE(ReshapeMover().Run(module.get()).ValueOrDie()); + EXPECT_TRUE(ReshapeMover().Run(&module()).ValueOrDie()); EXPECT_THAT( computation->root_instruction(), diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index e45b839afd..f37a331a72 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -127,6 +127,22 @@ cc_library( ], ) +cc_library( + name = "hlo_verified_test_base", + testonly = True, + srcs = ["hlo_verified_test_base.cc"], + hdrs = ["hlo_verified_test_base.h"], + deps = [ + ":hlo_test_base", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_verifier", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + tf_cc_binary( name = "local_client_aot_test_helper", srcs = ["local_client_aot_test_helper.cc"], diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc new file mode 100644 index 0000000000..31060b9e80 --- /dev/null +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -0,0 +1,69 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" + +#include "tensorflow/compiler/xla/service/hlo_verifier.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { + +/*static*/ int64 HloVerifiedTestBase::DefaultShapeSize(const Shape& shape) { + constexpr int64 kPointerSize = sizeof(void*); + if (ShapeUtil::IsOpaque(shape)) { + return kPointerSize; + } + return ShapeUtil::ByteSizeOf(shape, kPointerSize); +} + +HloVerifiedTestBase::HloVerifiedTestBase() : shape_size_fn_(DefaultShapeSize) {} + +HloVerifiedTestBase::~HloVerifiedTestBase() { + // We can't call the ASSERT or EXPECT test macros in destructors, so we + // perform HLO verification in TearDown, and use the CHECK here to ensure + // users don't accidentally override the verification. + CHECK(tear_down_called_) + << "TearDown was never called; subclasses of HloVerifiedTestBase that " + << "override TearDown must call the superclass TearDown."; +} + +void HloVerifiedTestBase::TearDown() { + EXPECT_FALSE(tear_down_called_) + << "TearDown called more than once; it should be called exactly once."; + tear_down_called_ = true; + if (module_) { + HloVerifier verifier(shape_size_fn_); + xla::StatusOr mutated = verifier.Run(module_.get()); + if (!mutated.ok()) { + ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); + } else { + EXPECT_FALSE(mutated.ValueOrDie()) + << "HloVerifier should never mutate the HloModule"; + } + } + HloTestBase::TearDown(); +} + +HloModule& HloVerifiedTestBase::module() { + if (!module_) { + module_ = CreateNewModule(); + } + return *module_; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h new file mode 100644 index 0000000000..b3d6b5af3b --- /dev/null +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_TESTS_HLO_VERIFIED_TEST_BASE_H_ +#define TENSORFLOW_COMPILER_XLA_TESTS_HLO_VERIFIED_TEST_BASE_H_ + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" + +namespace xla { + +// A base class for HLO tests that stores a default HloModule, and automatically +// performs verification on that module on tear-down. +class HloVerifiedTestBase : public HloTestBase { + public: + // Returns the size in bytes of the given shape, using a default pointer size. + static int64 DefaultShapeSize(const Shape& shape); + + protected: + HloVerifiedTestBase(); + ~HloVerifiedTestBase() override; + + // Performs verification on the default HloModule returned by module(). + // Automatically called by the testing framework for each test. + // + // REQUIRED: subclasses that override TearDown() must call this explicitly. + void TearDown() override; + + // Returns the default HloModule, lazily creating it if necessary via + // HloTestBase::CreateNewModule(). + HloModule& module(); + + // Sets the shape-size function used during hlo verification. If this isn't + // called, DefaultShapeSize is used instead. + void SetShapeSizeFn(std::function shape_size_fn) { + shape_size_fn_ = std::move(shape_size_fn); + } + + private: + std::unique_ptr module_; // Lazily populated. Access via module(). + std::function shape_size_fn_; + bool tear_down_called_ = false; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_TESTS_HLO_VERIFIED_TEST_BASE_H_ -- GitLab From 4b6eacbcdb8ca5182f83eee89edad24d87420b8e Mon Sep 17 00:00:00 2001 From: Taehoon Lee Date: Tue, 10 Oct 2017 22:27:05 +0900 Subject: [PATCH 218/909] Fix typos --- tensorflow/c/c_api.h | 2 +- .../contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py | 6 +++--- tensorflow/contrib/mpi_collectives/__init__.py | 2 +- tensorflow/core/graph/graph.h | 2 +- tensorflow/core/grappler/optimizers/model_pruner.cc | 2 +- tensorflow/core/profiler/README.md | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index db94828e1a..7c31b04ed1 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -1144,7 +1144,7 @@ TF_CAPI_EXPORT extern TF_Function* TF_FunctionImportFunctionDef( const void* proto, size_t proto_len, TF_Status* status); // Sets function attribute named `attr_name` to value stored in `proto`. -// If this attribute is already set to another value, it is overriden. +// If this attribute is already set to another value, it is overridden. // `proto` should point to a sequence of bytes of length `proto_len` // representing a binary serialization of an AttrValue protocol // buffer. diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py index 9e627bcaf4..1ce8954bb0 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_test.py @@ -385,7 +385,7 @@ class CudnnRNNTestSaveRestore(TensorFlowTestCase): reset_op = state_ops.assign( opaque_params, array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype)) - # Passing graph explictly, otherwise an old sess would be reused. + # Passing graph explicitly, otherwise an old sess would be reused. with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) val = saver.save(sess, save_path) @@ -436,7 +436,7 @@ class CudnnRNNTestSaveRestore(TensorFlowTestCase): save_path = os.path.join(self.get_temp_dir(), "save-restore-variable-test2") saver = saver_lib.Saver() - # Passing graph explictly, otherwise an old sess would be reused. + # Passing graph explicitly, otherwise an old sess would be reused. with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) val = saver.save(sess, save_path) @@ -484,7 +484,7 @@ class CudnnRNNTestSaveRestore(TensorFlowTestCase): array_ops.zeros( array_ops.shape(rnn.trainable_variables[0]), dtype=dtype)) - # Passing graph explictly, otherwise an old sess would be reused. + # Passing graph explicitly, otherwise an old sess would be reused. with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) inputs, initial_state = model.SynthesizeInput(seq_length, batch_size) diff --git a/tensorflow/contrib/mpi_collectives/__init__.py b/tensorflow/contrib/mpi_collectives/__init__.py index b94f7b0a35..9ed16a6f07 100644 --- a/tensorflow/contrib/mpi_collectives/__init__.py +++ b/tensorflow/contrib/mpi_collectives/__init__.py @@ -194,7 +194,7 @@ class DistributedOptimizer(tf.train.Optimizer): See Optimizer.compute_gradients() for more info. - In DistributedOptimizer, compute_gradients() is overriden to also + In DistributedOptimizer, compute_gradients() is overridden to also allreduce the gradients before returning them. """ gradients = (super(DistributedOptimizer, self) diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 5a31a6216b..418ce63bcb 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -631,7 +631,7 @@ class Graph { std::unordered_map device_names_map_; // All the while contexts owned by this graph, keyed by frame name, - // corresonding to all the while loops contained in this graph (including + // corresponding to all the while loops contained in this graph (including // nested loops). The stored contexts are usually accessed via // AddWhileContext() or Node::while_ctx(), but this manages the lifetime. std::map while_ctxs_; diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc index e087621c3b..b9df196f83 100644 --- a/tensorflow/core/grappler/optimizers/model_pruner.cc +++ b/tensorflow/core/grappler/optimizers/model_pruner.cc @@ -104,7 +104,7 @@ Status ModelPruner::Optimize(Cluster* cluster, const GrapplerItem& item, // - Don't remove nodes that receive reference values, as those can be // converting references to non-references. It is important to preserve // these non-references since the partitioner will avoid sending - // non-references accross partitions more than once. + // non-references across partitions more than once. if (!rewriter.DrivesControlDependency(node) && !rewriter.IsDrivenByControlDependency(node) && !rewriter.IsConnectedToFunction(node) && diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 92bce9c1ce..8ca26fa5dc 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -48,7 +48,7 @@ bazel-bin/tensorflow/python/profiler/profiler_ui \ # Create options to profile the time and memory information. builder = tf.profiler.ProfileOptionBuilder opts = builder(builder.time_and_memory()).order_by('micros').build() -# Create a profiling context, set contructor argument `trace_steps`, +# Create a profiling context, set constructor argument `trace_steps`, # `dump_steps` to empty for explicit control. with tf.contrib.tfprof.ProfileContext('/tmp/train_dir', trace_steps=[], -- GitLab From 5a26d1ede506825455d1199267d88caeba7d206a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 07:02:27 -0700 Subject: [PATCH 219/909] Minor cleanup (remove unused inclusions, NULL => nullptr) PiperOrigin-RevId: 171672655 --- tensorflow/contrib/boosted_trees/kernels/model_ops.cc | 1 - tensorflow/core/kernels/cuda_solvers.cc | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc index d63be3d041..4b5d5ba0de 100644 --- a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc @@ -15,7 +15,6 @@ #include #include "tensorflow/contrib/boosted_trees/lib/utils/tensor_utils.h" -#include "tensorflow/contrib/boosted_trees/proto/tree_config.pb.h" #include "tensorflow/contrib/boosted_trees/resources/decision_tree_ensemble_resource.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_mgr.h" diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc index 6c12a0e218..a83671a471 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -617,10 +617,11 @@ static inline Status GesvdImpl( // kernel on the stream, it is not a big performance hit. mutex_lock lock(handle_map_mutex); /* Launch the solver kernel. */ - TF_RETURN_IF_CUSOLVER_ERROR(solver( - cusolver_dn_handle, jobu, jobvt, m, n, CUDAComplex(A), lda, S, - CUDAComplex(U), ldu, CUDAComplex(VT), ldvt, - CUDAComplex(dev_workspace.mutable_data()), lwork, NULL, dev_lapack_info)); + TF_RETURN_IF_CUSOLVER_ERROR(solver(cusolver_dn_handle, jobu, jobvt, m, n, + CUDAComplex(A), lda, S, CUDAComplex(U), + ldu, CUDAComplex(VT), ldvt, + CUDAComplex(dev_workspace.mutable_data()), + lwork, nullptr, dev_lapack_info)); return Status::OK(); } -- GitLab From 3bafe0a86f67dd54197c6d60bdb5053f510de7d8 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 10 Oct 2017 08:36:23 -0700 Subject: [PATCH 220/909] Add uint32 and uint64 types to TensorFlow. This change merely creates the types, but does not register kernels that act on uint32/uint64 values. It also does not alter most op registration lists to include uint32/uint64 values. If desirable, that can be done in a subsequent change, although binary size will likely prove problematic if adding more kernels. The intent of the change is so XLA-compiled code can make use uint32/uint64 types. Since XLA does not use traditional TensorFlow kernels, using uint32/uint64 operators from XLA will require only uint32/uint64 op registrations, but will require few new kernel registrations. PiperOrigin-RevId: 171681867 --- tensorflow/c/c_api.h | 2 ++ tensorflow/compiler/tf2xla/type_util.cc | 6 ++++ tensorflow/compiler/tf2xla/xla_op_registry.h | 13 ++++--- .../python/learn/learn_io/data_feeder_test.py | 20 +++++------ .../core/framework/op_def_builder_test.cc | 13 +++---- tensorflow/core/framework/register_types.h | 6 ++++ tensorflow/core/framework/tensor.cc | 23 +++++++++++++ tensorflow/core/framework/tensor.proto | 6 ++++ tensorflow/core/framework/types.cc | 34 +++++++++++++++---- tensorflow/core/framework/types.h | 2 ++ tensorflow/core/framework/types.proto | 16 ++++++--- tensorflow/go/tensor.go | 4 +++ tensorflow/python/__init__.py | 2 ++ tensorflow/python/framework/dtypes.py | 20 +++++++++++ tensorflow/python/framework/dtypes_test.py | 3 ++ tensorflow/python/framework/function.py | 2 ++ tensorflow/python/lib/core/ndarray_tensor.cc | 6 ++++ .../python/lib/core/ndarray_tensor_bridge.cc | 6 ++++ tensorflow/tools/api/golden/tensorflow.pbtxt | 8 +++++ 19 files changed, 159 insertions(+), 33 deletions(-) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index db94828e1a..68a758498d 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -118,6 +118,8 @@ typedef enum TF_DataType { TF_HALF = 19, TF_RESOURCE = 20, TF_VARIANT = 21, + TF_UINT32 = 22, + TF_UINT64 = 23, } TF_DataType; // TF_DataTypeSize returns the sizeof() for the underlying type corresponding diff --git a/tensorflow/compiler/tf2xla/type_util.cc b/tensorflow/compiler/tf2xla/type_util.cc index b54848f342..c698488776 100644 --- a/tensorflow/compiler/tf2xla/type_util.cc +++ b/tensorflow/compiler/tf2xla/type_util.cc @@ -43,6 +43,12 @@ Status DataTypeToPrimitiveType(DataType data_type, xla::PrimitiveType* type) { case tensorflow::DT_UINT16: *type = xla::U16; return Status::OK(); + case tensorflow::DT_UINT32: + *type = xla::U32; + return Status::OK(); + case tensorflow::DT_UINT64: + *type = xla::U64; + return Status::OK(); case tensorflow::DT_HALF: *type = xla::F16; return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h index 1a8d03757a..2144868646 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.h +++ b/tensorflow/compiler/tf2xla/xla_op_registry.h @@ -45,17 +45,16 @@ extern const char* const DEVICE_GPU_XLA_JIT; // "GPU_XLA_JIT" extern const char* const DEVICE_XLA_CPU; extern const char* const DEVICE_XLA_GPU; -constexpr std::array kIntTypes = {{DT_INT32, DT_INT64}}; constexpr std::array kFloatTypes = { {DT_HALF, DT_FLOAT, DT_DOUBLE}}; -constexpr std::array kNumericTypes = { - {DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE}}; +constexpr std::array kNumericTypes = { + {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE}}; -constexpr std::array kCpuAllTypes = { - {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_BOOL}}; +constexpr std::array kCpuAllTypes = { + {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_BOOL}}; -constexpr std::array kGpuAllTypes = { - {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_BOOL}}; +constexpr std::array kGpuAllTypes = { + {DT_UINT32, DT_UINT64, DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_BOOL}}; // Class that manages registrations of operators and devices for the XLA JIT. // Not thread-safe. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py index eaf6ae4ed7..82848be7df 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py @@ -42,16 +42,6 @@ class DataFeederTest(test.TestCase): with self.assertRaisesRegexp(TypeError, 'annot convert'): data_feeder.DataFeeder(input_data, None, n_classes=0, batch_size=1) - def test_input_uint32(self): - data = np.matrix([[1, 2], [3, 4]], dtype=np.uint32) - self._assert_raises(data) - self._assert_raises(self._wrap_dict(data)) - - def test_input_uint64(self): - data = np.matrix([[1, 2], [3, 4]], dtype=np.uint64) - self._assert_raises(data) - self._assert_raises(self._wrap_dict(data)) - def _assert_dtype(self, expected_np_dtype, expected_tf_dtype, input_data): feeder = data_feeder.DataFeeder(input_data, None, n_classes=0, batch_size=1) if isinstance(input_data, dict): @@ -87,6 +77,16 @@ class DataFeederTest(test.TestCase): self._assert_dtype(np.int64, dtypes.int64, data) self._assert_dtype(np.int64, dtypes.int64, self._wrap_dict(data)) + def test_input_uint32(self): + data = np.matrix([[1, 2], [3, 4]], dtype=np.uint32) + self._assert_dtype(np.uint32, dtypes.uint32, data) + self._assert_dtype(np.uint32, dtypes.uint32, self._wrap_dict(data)) + + def test_input_uint64(self): + data = np.matrix([[1, 2], [3, 4]], dtype=np.uint64) + self._assert_dtype(np.uint64, dtypes.uint64, data) + self._assert_dtype(np.uint64, dtypes.uint64, self._wrap_dict(data)) + def test_input_uint8(self): data = np.matrix([[1, 2], [3, 4]], dtype=np.uint8) self._assert_dtype(np.uint8, dtypes.uint8, data) diff --git a/tensorflow/core/framework/op_def_builder_test.cc b/tensorflow/core/framework/op_def_builder_test.cc index efedb221e7..c1511ebe34 100644 --- a/tensorflow/core/framework/op_def_builder_test.cc +++ b/tensorflow/core/framework/op_def_builder_test.cc @@ -124,21 +124,22 @@ TEST_F(OpDefBuilderTest, AttrWithRestrictions) { "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, " - "DT_QINT32] } } }"); + "DT_QINT32, DT_UINT32, DT_UINT64] } } }"); ExpectSuccess( b().Attr("a:{numbertype, variant}"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " "DT_UINT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, " - "DT_QINT32, DT_VARIANT] } } }"); + "DT_QINT32, DT_UINT32, DT_UINT64, DT_VARIANT] } } }"); ExpectSuccess(b().Attr("a:realnumbertype"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, " - "DT_INT16, DT_UINT16, DT_INT8] } } }"); + "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64] } } }"); ExpectSuccess(b().Attr("a:{realnumbertype, variant , string, }"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_HALF, DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, " - "DT_INT16, DT_UINT16, DT_INT8, DT_VARIANT, DT_STRING] } } }"); + "DT_INT16, DT_UINT16, DT_INT8, DT_UINT32, DT_UINT64, " + "DT_VARIANT, DT_STRING] } } }"); ExpectSuccess(b().Attr("a:quantizedtype"), "attr: { name: 'a' type: 'type' allowed_values { list { type: " "[DT_QINT8, DT_QUINT8, DT_QINT32, DT_QINT16, DT_QUINT16]} } }"); @@ -215,12 +216,12 @@ TEST_F(OpDefBuilderTest, AttrListOfRestricted) { b().Attr("a:list(realnumbertype)"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " - "DT_UINT16, DT_INT8, DT_HALF] } } }"); + "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64] } } }"); ExpectSuccess( b().Attr("a:list({realnumbertype, variant})"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " "[DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, DT_INT16, " - "DT_UINT16, DT_INT8, DT_HALF, DT_VARIANT] } } }"); + "DT_UINT16, DT_INT8, DT_HALF, DT_UINT32, DT_UINT64, DT_VARIANT] } } }"); ExpectSuccess( b().Attr("a:list(quantizedtype)"), "attr: { name: 'a' type: 'list(type)' allowed_values { list { type: " diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index 030c00cb8e..3f9c307d03 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -60,6 +60,7 @@ limitations under the License. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) m(double) #define TF_CALL_int32(m) m(::tensorflow::int32) +#define TF_CALL_uint32(m) m(::tensorflow::uint32) #define TF_CALL_uint8(m) m(::tensorflow::uint8) #define TF_CALL_int16(m) m(::tensorflow::int16) @@ -68,6 +69,7 @@ limitations under the License. #define TF_CALL_resource(m) m(::tensorflow::ResourceHandle) #define TF_CALL_complex64(m) m(::tensorflow::complex64) #define TF_CALL_int64(m) m(::tensorflow::int64) +#define TF_CALL_uint64(m) m(::tensorflow::uint64) #define TF_CALL_bool(m) m(bool) #define TF_CALL_qint8(m) m(::tensorflow::qint8) @@ -87,6 +89,7 @@ limitations under the License. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) #define TF_CALL_int32(m) m(::tensorflow::int32) +#define TF_CALL_uint32(m) #define TF_CALL_uint8(m) #define TF_CALL_int16(m) @@ -95,6 +98,7 @@ limitations under the License. #define TF_CALL_resource(m) #define TF_CALL_complex64(m) #define TF_CALL_int64(m) m(::tensorflow::int64) +#define TF_CALL_uint64(m) #define TF_CALL_bool(m) m(bool) #define TF_CALL_qint8(m) m(::tensorflow::qint8) @@ -114,6 +118,7 @@ limitations under the License. #define TF_CALL_float(m) m(float) #define TF_CALL_double(m) #define TF_CALL_int32(m) m(::tensorflow::int32) +#define TF_CALL_uint32(m) #define TF_CALL_uint8(m) #define TF_CALL_int16(m) @@ -122,6 +127,7 @@ limitations under the License. #define TF_CALL_resource(m) #define TF_CALL_complex64(m) #define TF_CALL_int64(m) +#define TF_CALL_uint64(m) #define TF_CALL_bool(m) m(bool) #define TF_CALL_qint8(m) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index a5b5ef0acc..24b7b08ebc 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -288,6 +288,7 @@ PROTO_TRAITS(double, double, double); PROTO_TRAITS(int32, int32, int); PROTO_TRAITS(uint8, int32, int); PROTO_TRAITS(uint16, int32, int); +PROTO_TRAITS(uint32, uint32, uint32); PROTO_TRAITS(int16, int32, int); PROTO_TRAITS(int8, int32, int); PROTO_TRAITS(bool, bool, bool); @@ -312,6 +313,20 @@ struct ProtoHelper { } }; +template <> +struct ProtoHelper { + static const uint64* Begin(const TensorProto& proto) { + return reinterpret_cast(proto.uint64_val().begin()); + } + static size_t NumElements(const TensorProto& proto) { + return proto.uint64_val().size(); + } + static void Fill(const uint64* data, size_t n, TensorProto* proto) { + protobuf::RepeatedField copy(data, data + n); + proto->mutable_uint64_val()->Swap(©); + } +}; + template <> struct ProtoHelper { static protobuf::RepeatedPtrField::const_iterator Begin( @@ -649,6 +664,8 @@ bool Tensor::RefCountIsOne() const { CASE(int32, SINGLE_ARG(STMTS)) \ CASE(uint8, SINGLE_ARG(STMTS)) \ CASE(uint16, SINGLE_ARG(STMTS)) \ + CASE(uint32, SINGLE_ARG(STMTS)) \ + CASE(uint64, SINGLE_ARG(STMTS)) \ CASE(int16, SINGLE_ARG(STMTS)) \ CASE(int8, SINGLE_ARG(STMTS)) \ CASE(string, SINGLE_ARG(STMTS)) \ @@ -925,6 +942,9 @@ string Tensor::SummarizeValue(int64 max_entries) const { case DT_DOUBLE: return SummarizeArray(limit, num_elts, shape_, data); break; + case DT_UINT32: + return SummarizeArray(limit, num_elts, shape_, data); + break; case DT_INT32: return SummarizeArray(limit, num_elts, shape_, data); break; @@ -944,6 +964,9 @@ string Tensor::SummarizeValue(int64 max_entries) const { case DT_QINT8: return SummarizeArray(limit, num_elts, shape_, data); break; + case DT_UINT64: + return SummarizeArray(limit, num_elts, shape_, data); + break; case DT_INT64: return SummarizeArray(limit, num_elts, shape_, data); break; diff --git a/tensorflow/core/framework/tensor.proto b/tensorflow/core/framework/tensor.proto index 7e4af7a645..6dab325969 100644 --- a/tensorflow/core/framework/tensor.proto +++ b/tensorflow/core/framework/tensor.proto @@ -75,6 +75,12 @@ message TensorProto { // DT_VARIANT repeated VariantTensorDataProto variant_val = 15; + + // DT_UINT32 + repeated uint32 uint32_val = 16 [packed = true]; + + // DT_UINT64 + repeated uint64 uint64_val = 17 [packed = true]; }; // Protocol buffer representing the serialization format of DT_VARIANT tensors. diff --git a/tensorflow/core/framework/types.cc b/tensorflow/core/framework/types.cc index 1a5fd10f52..cc86871cae 100644 --- a/tensorflow/core/framework/types.cc +++ b/tensorflow/core/framework/types.cc @@ -61,6 +61,8 @@ string DataTypeString(DataType dtype) { return "double"; case DT_INT32: return "int32"; + case DT_UINT32: + return "uint32"; case DT_UINT8: return "uint8"; case DT_UINT16: @@ -77,6 +79,8 @@ string DataTypeString(DataType dtype) { return "complex128"; case DT_INT64: return "int64"; + case DT_UINT64: + return "uint64"; case DT_BOOL: return "bool"; case DT_QINT8: @@ -124,6 +128,9 @@ bool DataTypeFromString(StringPiece sp, DataType* dt) { } else if (sp == "int32") { *dt = DT_INT32; return true; + } else if (sp == "uint32") { + *dt = DT_UINT32; + return true; } else if (sp == "uint8") { *dt = DT_UINT8; return true; @@ -148,6 +155,9 @@ bool DataTypeFromString(StringPiece sp, DataType* dt) { } else if (sp == "int64") { *dt = DT_INT64; return true; + } else if (sp == "uint64") { + *dt = DT_UINT64; + return true; } else if (sp == "bool") { *dt = DT_BOOL; return true; @@ -199,14 +209,15 @@ DataTypeVector AllTypes() { return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_UINT8, DT_INT16, DT_UINT16, DT_INT8, DT_STRING, DT_COMPLEX64, DT_COMPLEX128, DT_INT64, DT_BOOL, DT_QINT8, DT_QUINT8, DT_QINT16, - DT_QUINT16, DT_QINT32, DT_HALF, DT_RESOURCE, DT_VARIANT}; + DT_QUINT16, DT_QINT32, DT_HALF, DT_RESOURCE, DT_VARIANT, + DT_UINT32, DT_UINT64}; } #if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) DataTypeVector RealNumberTypes() { - return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, - DT_INT16, DT_INT8, DT_UINT16, DT_HALF}; + return {DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64, DT_UINT8, DT_INT16, + DT_INT8, DT_UINT16, DT_HALF, DT_UINT32, DT_UINT64}; } DataTypeVector QuantizedTypes() { @@ -220,9 +231,10 @@ DataTypeVector RealAndQuantizedTypes() { } DataTypeVector NumberTypes() { - return {DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_UINT8, - DT_UINT16, DT_INT16, DT_INT8, DT_COMPLEX64, DT_COMPLEX128, - DT_QINT8, DT_QUINT8, DT_QINT32, DT_HALF}; + return {DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, + DT_UINT8, DT_UINT16, DT_INT16, DT_INT8, + DT_COMPLEX64, DT_COMPLEX128, DT_QINT8, DT_QUINT8, + DT_QINT32, DT_HALF, DT_UINT32, DT_UINT64}; } #elif defined(__ANDROID_TYPES_FULL__) @@ -271,6 +283,7 @@ bool DataTypeCanUseMemcpy(DataType dt) { case DT_FLOAT: case DT_DOUBLE: case DT_INT32: + case DT_UINT32: case DT_UINT8: case DT_UINT16: case DT_INT16: @@ -278,6 +291,7 @@ bool DataTypeCanUseMemcpy(DataType dt) { case DT_COMPLEX64: case DT_COMPLEX128: case DT_INT64: + case DT_UINT64: case DT_BOOL: case DT_QINT8: case DT_QUINT8: @@ -312,7 +326,9 @@ bool DataTypeIsInteger(DataType dt) { case DT_INT16: case DT_UINT16: case DT_INT32: + case DT_UINT32: case DT_INT64: + case DT_UINT64: return true; default: return false; @@ -331,6 +347,12 @@ int DataTypeSize(DataType dt) { // bitcast. TF_CALL_qint16(CASE); TF_CALL_quint16(CASE); + + // uint32 and uint64 aren't included in TF_CALL_POD_TYPES because we + // don't want to define kernels for them at this stage to avoid binary + // bloat. + TF_CALL_uint32(CASE); + TF_CALL_uint64(CASE); default: return 0; } diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index 3b4362bcc9..300a57e948 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -187,6 +187,7 @@ struct EnumToDataType {}; // Specializations below MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); MATCH_TYPE_AND_ENUM(int32, DT_INT32); +MATCH_TYPE_AND_ENUM(uint32, DT_UINT32); MATCH_TYPE_AND_ENUM(uint16, DT_UINT16); MATCH_TYPE_AND_ENUM(uint8, DT_UINT8); MATCH_TYPE_AND_ENUM(int16, DT_INT16); @@ -195,6 +196,7 @@ MATCH_TYPE_AND_ENUM(string, DT_STRING); MATCH_TYPE_AND_ENUM(complex64, DT_COMPLEX64); MATCH_TYPE_AND_ENUM(complex128, DT_COMPLEX128); MATCH_TYPE_AND_ENUM(int64, DT_INT64); +MATCH_TYPE_AND_ENUM(uint64, DT_UINT64); MATCH_TYPE_AND_ENUM(bool, DT_BOOL); MATCH_TYPE_AND_ENUM(qint8, DT_QINT8); MATCH_TYPE_AND_ENUM(quint8, DT_QUINT8); diff --git a/tensorflow/core/framework/types.proto b/tensorflow/core/framework/types.proto index 1beb2a1aa2..e003fd0010 100644 --- a/tensorflow/core/framework/types.proto +++ b/tensorflow/core/framework/types.proto @@ -35,9 +35,8 @@ enum DataType { DT_HALF = 19; DT_RESOURCE = 20; DT_VARIANT = 21; // Arbitrary C++ data types - - // TODO(josh11b): DT_GENERIC_PROTO = ??; - // TODO(jeff,josh11b): DT_UINT64? DT_UINT32? + DT_UINT32 = 22; + DT_UINT64 = 23; // Do not use! These are only for parameters. Every enum above // should have a corresponding value below (verified by types_test). @@ -62,5 +61,14 @@ enum DataType { DT_HALF_REF = 119; DT_RESOURCE_REF = 120; DT_VARIANT_REF = 121; + DT_UINT32_REF = 122; + DT_UINT64_REF = 123; } -// LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.h,https://www.tensorflow.org/code/tensorflow/go/tensor.go) +// LINT.ThenChange( +// https://www.tensorflow.org/code/tensorflow/c/c_api.h, +// https://www.tensorflow.org/code/tensorflow/go/tensor.go, +// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.h, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc, +// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py, +// https://www.tensorflow.org/code/tensorflow/python/framework/function.py) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index e8fa21a62b..36a74c0081 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -39,6 +39,7 @@ const ( Float DataType = C.TF_FLOAT Double DataType = C.TF_DOUBLE Int32 DataType = C.TF_INT32 + Uint32 DataType = C.TF_UINT32 Uint8 DataType = C.TF_UINT8 Int16 DataType = C.TF_INT16 Int8 DataType = C.TF_INT8 @@ -46,6 +47,7 @@ const ( Complex64 DataType = C.TF_COMPLEX64 Complex DataType = C.TF_COMPLEX Int64 DataType = C.TF_INT64 + Uint64 DataType = C.TF_UINT64 Bool DataType = C.TF_BOOL Qint8 DataType = C.TF_QINT8 Quint8 DataType = C.TF_QUINT8 @@ -217,12 +219,14 @@ var types = []struct { {reflect.TypeOf(float32(0)), C.TF_FLOAT}, {reflect.TypeOf(float64(0)), C.TF_DOUBLE}, {reflect.TypeOf(int32(0)), C.TF_INT32}, + {reflect.TypeOf(uint32(0)), C.TF_UINT32}, {reflect.TypeOf(uint8(0)), C.TF_UINT8}, {reflect.TypeOf(int16(0)), C.TF_INT16}, {reflect.TypeOf(int8(0)), C.TF_INT8}, {reflect.TypeOf(""), C.TF_STRING}, {reflect.TypeOf(complex(float32(0), float32(0))), C.TF_COMPLEX64}, {reflect.TypeOf(int64(0)), C.TF_INT64}, + {reflect.TypeOf(uint64(0)), C.TF_UINT64}, {reflect.TypeOf(false), C.TF_BOOL}, {reflect.TypeOf(uint16(0)), C.TF_UINT16}, {reflect.TypeOf(complex(float64(0), float64(0))), C.TF_COMPLEX128}, diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index f3bdea92dd..f21f1f822c 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -213,6 +213,8 @@ _allowed_symbols.extend([ 'quint16', 'quint8', 'string', + 'uint64', + 'uint32', 'uint16', 'uint8', 'resource', diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py index 43535a593e..db124ab12a 100644 --- a/tensorflow/python/framework/dtypes.py +++ b/tensorflow/python/framework/dtypes.py @@ -37,6 +37,8 @@ class DType(object): * `tf.int8`: 8-bit signed integer. * `tf.uint8`: 8-bit unsigned integer. * `tf.uint16`: 16-bit unsigned integer. + * `tf.uint32`: 32-bit unsigned integer. + * `tf.uint64`: 64-bit unsigned integer. * `tf.int16`: 16-bit signed integer. * `tf.int32`: 32-bit signed integer. * `tf.int64`: 64-bit signed integer. @@ -318,6 +320,8 @@ double = float64 int32 = DType(types_pb2.DT_INT32) uint8 = DType(types_pb2.DT_UINT8) uint16 = DType(types_pb2.DT_UINT16) +uint32 = DType(types_pb2.DT_UINT32) +uint64 = DType(types_pb2.DT_UINT64) int16 = DType(types_pb2.DT_INT16) int8 = DType(types_pb2.DT_INT8) string = DType(types_pb2.DT_STRING) @@ -339,6 +343,7 @@ float32_ref = DType(types_pb2.DT_FLOAT_REF) float64_ref = DType(types_pb2.DT_DOUBLE_REF) double_ref = float64_ref int32_ref = DType(types_pb2.DT_INT32_REF) +uint32_ref = DType(types_pb2.DT_UINT32_REF) uint8_ref = DType(types_pb2.DT_UINT8_REF) uint16_ref = DType(types_pb2.DT_UINT16_REF) int16_ref = DType(types_pb2.DT_INT16_REF) @@ -347,6 +352,7 @@ string_ref = DType(types_pb2.DT_STRING_REF) complex64_ref = DType(types_pb2.DT_COMPLEX64_REF) complex128_ref = DType(types_pb2.DT_COMPLEX128_REF) int64_ref = DType(types_pb2.DT_INT64_REF) +uint64_ref = DType(types_pb2.DT_UINT64_REF) bool_ref = DType(types_pb2.DT_BOOL_REF) qint8_ref = DType(types_pb2.DT_QINT8_REF) quint8_ref = DType(types_pb2.DT_QUINT8_REF) @@ -365,6 +371,8 @@ _INTERN_TABLE = { types_pb2.DT_INT32: int32, types_pb2.DT_UINT8: uint8, types_pb2.DT_UINT16: uint16, + types_pb2.DT_UINT32: uint32, + types_pb2.DT_UINT64: uint64, types_pb2.DT_INT16: int16, types_pb2.DT_INT8: int8, types_pb2.DT_STRING: string, @@ -384,6 +392,7 @@ _INTERN_TABLE = { types_pb2.DT_FLOAT_REF: float32_ref, types_pb2.DT_DOUBLE_REF: float64_ref, types_pb2.DT_INT32_REF: int32_ref, + types_pb2.DT_UINT32_REF: uint32_ref, types_pb2.DT_UINT8_REF: uint8_ref, types_pb2.DT_UINT16_REF: uint16_ref, types_pb2.DT_INT16_REF: int16_ref, @@ -392,6 +401,7 @@ _INTERN_TABLE = { types_pb2.DT_COMPLEX64_REF: complex64_ref, types_pb2.DT_COMPLEX128_REF: complex128_ref, types_pb2.DT_INT64_REF: int64_ref, + types_pb2.DT_UINT64_REF: uint64_ref, types_pb2.DT_BOOL_REF: bool_ref, types_pb2.DT_QINT8_REF: qint8_ref, types_pb2.DT_QUINT8_REF: quint8_ref, @@ -412,6 +422,8 @@ _TYPE_TO_STRING = { types_pb2.DT_INT32: "int32", types_pb2.DT_UINT8: "uint8", types_pb2.DT_UINT16: "uint16", + types_pb2.DT_UINT32: "uint32", + types_pb2.DT_UINT64: "uint64", types_pb2.DT_INT16: "int16", types_pb2.DT_INT8: "int8", types_pb2.DT_STRING: "string", @@ -431,6 +443,7 @@ _TYPE_TO_STRING = { types_pb2.DT_FLOAT_REF: "float32_ref", types_pb2.DT_DOUBLE_REF: "float64_ref", types_pb2.DT_INT32_REF: "int32_ref", + types_pb2.DT_UINT32_REF: "uint32_ref", types_pb2.DT_UINT8_REF: "uint8_ref", types_pb2.DT_UINT16_REF: "uint16_ref", types_pb2.DT_INT16_REF: "int16_ref", @@ -439,6 +452,7 @@ _TYPE_TO_STRING = { types_pb2.DT_COMPLEX64_REF: "complex64_ref", types_pb2.DT_COMPLEX128_REF: "complex128_ref", types_pb2.DT_INT64_REF: "int64_ref", + types_pb2.DT_UINT64_REF: "uint64_ref", types_pb2.DT_BOOL_REF: "bool_ref", types_pb2.DT_QINT8_REF: "qint8_ref", types_pb2.DT_QUINT8_REF: "quint8_ref", @@ -484,6 +498,8 @@ _NP_TO_TF = frozenset([ (np.int64, int64), (np.uint8, uint8), (np.uint16, uint16), + (np.uint32, uint32), + (np.uint64, uint64), (np.int16, int16), (np.int8, int8), (np.complex64, complex64), @@ -504,6 +520,8 @@ _TF_TO_NP = { types_pb2.DT_INT32: np.int32, types_pb2.DT_UINT8: np.uint8, types_pb2.DT_UINT16: np.uint16, + types_pb2.DT_UINT32: np.uint32, + types_pb2.DT_UINT64: np.uint64, types_pb2.DT_INT16: np.int16, types_pb2.DT_INT8: np.int8, # NOTE(touts): For strings we use np.object as it supports variable length @@ -525,6 +543,7 @@ _TF_TO_NP = { types_pb2.DT_FLOAT_REF: np.float32, types_pb2.DT_DOUBLE_REF: np.float64, types_pb2.DT_INT32_REF: np.int32, + types_pb2.DT_UINT32_REF: np.uint32, types_pb2.DT_UINT8_REF: np.uint8, types_pb2.DT_UINT16_REF: np.uint16, types_pb2.DT_INT16_REF: np.int16, @@ -533,6 +552,7 @@ _TF_TO_NP = { types_pb2.DT_COMPLEX64_REF: np.complex64, types_pb2.DT_COMPLEX128_REF: np.complex128, types_pb2.DT_INT64_REF: np.int64, + types_pb2.DT_UINT64_REF: np.uint64, types_pb2.DT_BOOL_REF: np.bool, types_pb2.DT_QINT8_REF: _np_qint8, types_pb2.DT_QUINT8_REF: _np_quint8, diff --git a/tensorflow/python/framework/dtypes_test.py b/tensorflow/python/framework/dtypes_test.py index 1e84f1b656..67842e14b1 100644 --- a/tensorflow/python/framework/dtypes_test.py +++ b/tensorflow/python/framework/dtypes_test.py @@ -268,6 +268,9 @@ class TypesTest(test_util.TensorFlowTestCase): self.assertEquals(dtype.min, 0) self.assertEquals(dtype.max, 4294967295) if numpy_dtype == np.uint32: + self.assertEquals(dtype.min, 0) + self.assertEquals(dtype.max, 4294967295) + if numpy_dtype == np.uint64: self.assertEquals(dtype.min, 0) self.assertEquals(dtype.max, 18446744073709551615) if numpy_dtype in (np.float16, np.float32, np.float64): diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 7068e72009..cef3f8d4c4 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -1002,6 +1002,8 @@ _DTYPE_TO_STR = { dtypes.int32: "i32", dtypes.uint8: "i8", dtypes.uint16: "u16", + dtypes.uint32: "u32", + dtypes.uint64: "u64", dtypes.int16: "i16", dtypes.int8: "i8", dtypes.string: "s", diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index b1a5a37924..cf2c2e6eb0 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -88,6 +88,12 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array, case NPY_UINT16: *out_tf_datatype = TF_UINT16; break; + case NPY_UINT32: + *out_tf_datatype = TF_UINT32; + break; + case NPY_UINT64: + *out_tf_datatype = TF_UINT64; + break; case NPY_INT8: *out_tf_datatype = TF_INT8; break; diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc index f468e0b70e..82c45f5a31 100644 --- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc +++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc @@ -120,6 +120,9 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype, case TF_INT32: *out_pyarray_type = NPY_INT32; break; + case TF_UINT32: + *out_pyarray_type = NPY_UINT32; + break; case TF_UINT8: *out_pyarray_type = NPY_UINT8; break; @@ -135,6 +138,9 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype, case TF_INT64: *out_pyarray_type = NPY_INT64; break; + case TF_UINT64: + *out_pyarray_type = NPY_UINT64; + break; case TF_BOOL: *out_pyarray_type = NPY_BOOL; break; diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 6e03f9e8fb..d77f8fd253 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -500,6 +500,14 @@ tf_module { name: "uint16" mtype: "" } + member { + name: "uint32" + mtype: "" + } + member { + name: "uint64" + mtype: "" + } member { name: "uint8" mtype: "" -- GitLab From 8776bfdf07be8ce95b9f1f75742b7bb8c9e30e35 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 08:51:00 -0700 Subject: [PATCH 221/909] Internal change PiperOrigin-RevId: 171683977 --- tensorflow/contrib/eager/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 1a63c901a2..7ef163c707 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -9,6 +9,7 @@ py_library( name = "tfe", srcs = ["tfe.py"], srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [ ":datasets", ":metrics", -- GitLab From cf3cddc2089d310360f2332ac4df2b14344f6cde Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 09:19:09 -0700 Subject: [PATCH 222/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171688013 --- .../core/ops/compat/ops_history.v1.pbtxt | 15800 ++++++++++++---- tensorflow/core/ops/ops.pbtxt | 252 + 2 files changed, 11862 insertions(+), 4190 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index a449fc1452..1eafbe138c 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -77,6 +77,46 @@ op { } } } +op { + name: "AccumulatorApplyGradient" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "local_step" + type: DT_INT64 + } + input_arg { + name: "gradient" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "AccumulatorNumAccumulated" input_arg { @@ -139,6 +179,46 @@ op { } } } +op { + name: "AccumulatorTakeGradient" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "num_required" + type: DT_INT32 + } + output_arg { + name: "average" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "Acos" input_arg { @@ -346,6 +426,51 @@ op { is_aggregate: true is_commutative: true } +op { + name: "AddN" + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "sum" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + type: DT_VARIANT + } + } + } + is_aggregate: true + is_commutative: true +} op { name: "AddSparseToTensorsMap" input_arg { @@ -745,7 +870,7 @@ op { } } op { - name: "ApplyAdagrad" + name: "ApplyAdadelta" input_arg { name: "var" type_attr: "T" @@ -756,10 +881,23 @@ op { type_attr: "T" is_ref: true } + input_arg { + name: "accum_update" + type_attr: "T" + is_ref: true + } input_arg { name: "lr" type_attr: "T" } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } input_arg { name: "grad" type_attr: "T" @@ -788,6 +926,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -800,42 +940,25 @@ op { } } op { - name: "ApplyAdagradDA" + name: "ApplyAdagrad" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "gradient_accumulator" - type_attr: "T" - is_ref: true - } - input_arg { - name: "gradient_squared_accumulator" + name: "accum" type_attr: "T" is_ref: true } - input_arg { - name: "grad" - type_attr: "T" - } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "l1" - type_attr: "T" - } - input_arg { - name: "l2" + name: "grad" type_attr: "T" } - input_arg { - name: "global_step" - type: DT_INT64 - } output_arg { name: "out" type_attr: "T" @@ -872,46 +995,21 @@ op { } } op { - name: "ApplyAdam" + name: "ApplyAdagrad" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "m" - type_attr: "T" - is_ref: true - } - input_arg { - name: "v" + name: "accum" type_attr: "T" is_ref: true } - input_arg { - name: "beta1_power" - type_attr: "T" - } - input_arg { - name: "beta2_power" - type_attr: "T" - } input_arg { name: "lr" type_attr: "T" } - input_arg { - name: "beta1" - type_attr: "T" - } - input_arg { - name: "beta2" - type_attr: "T" - } - input_arg { - name: "epsilon" - type_attr: "T" - } input_arg { name: "grad" type_attr: "T" @@ -940,6 +1038,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -952,28 +1052,24 @@ op { } } op { - name: "ApplyAdam" + name: "ApplyAdagradDA" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "m" + name: "gradient_accumulator" type_attr: "T" is_ref: true } input_arg { - name: "v" + name: "gradient_squared_accumulator" type_attr: "T" is_ref: true } input_arg { - name: "beta1_power" - type_attr: "T" - } - input_arg { - name: "beta2_power" + name: "grad" type_attr: "T" } input_arg { @@ -981,20 +1077,16 @@ op { type_attr: "T" } input_arg { - name: "beta1" - type_attr: "T" - } - input_arg { - name: "beta2" + name: "l1" type_attr: "T" } input_arg { - name: "epsilon" + name: "l2" type_attr: "T" } input_arg { - name: "grad" - type_attr: "T" + name: "global_step" + type: DT_INT64 } output_arg { name: "out" @@ -1030,55 +1122,43 @@ op { b: false } } - attr { - name: "use_nesterov" - type: "bool" - default_value { - b: false - } - } } op { - name: "ApplyCenteredRMSProp" + name: "ApplyAdagradDA" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "mg" + name: "gradient_accumulator" type_attr: "T" is_ref: true } input_arg { - name: "ms" + name: "gradient_squared_accumulator" type_attr: "T" is_ref: true } input_arg { - name: "mom" + name: "grad" type_attr: "T" - is_ref: true } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "rho" - type_attr: "T" - } - input_arg { - name: "momentum" + name: "l1" type_attr: "T" } input_arg { - name: "epsilon" + name: "l2" type_attr: "T" } input_arg { - name: "grad" - type_attr: "T" + name: "global_step" + type: DT_INT64 } output_arg { name: "out" @@ -1104,6 +1184,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1116,24 +1198,28 @@ op { } } op { - name: "ApplyFtrl" + name: "ApplyAdam" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" + name: "m" type_attr: "T" is_ref: true } input_arg { - name: "linear" + name: "v" type_attr: "T" is_ref: true } input_arg { - name: "grad" + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "beta2_power" type_attr: "T" } input_arg { @@ -1141,15 +1227,19 @@ op { type_attr: "T" } input_arg { - name: "l1" + name: "beta1" type_attr: "T" } input_arg { - name: "l2" + name: "beta2" type_attr: "T" } input_arg { - name: "lr_power" + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { @@ -1188,24 +1278,28 @@ op { } } op { - name: "ApplyFtrlV2" + name: "ApplyAdam" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" + name: "m" type_attr: "T" is_ref: true } input_arg { - name: "linear" + name: "v" type_attr: "T" is_ref: true } input_arg { - name: "grad" + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "beta2_power" type_attr: "T" } input_arg { @@ -1213,19 +1307,19 @@ op { type_attr: "T" } input_arg { - name: "l1" + name: "beta1" type_attr: "T" } input_arg { - name: "l2" + name: "beta2" type_attr: "T" } input_arg { - name: "l2_shrinkage" + name: "epsilon" type_attr: "T" } input_arg { - name: "lr_power" + name: "grad" type_attr: "T" } output_arg { @@ -1262,20 +1356,57 @@ op { b: false } } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } } op { - name: "ApplyGradientDescent" + name: "ApplyAdam" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "alpha" + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "v" type_attr: "T" + is_ref: true } input_arg { - name: "delta" + name: "beta1_power" + type_attr: "T" + } + input_arg { + name: "beta2_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { @@ -1302,6 +1433,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1312,16 +1445,33 @@ op { b: false } } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } } op { - name: "ApplyMomentum" + name: "ApplyCenteredRMSProp" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" + name: "mg" + type_attr: "T" + is_ref: true + } + input_arg { + name: "ms" + type_attr: "T" + is_ref: true + } + input_arg { + name: "mom" type_attr: "T" is_ref: true } @@ -1330,13 +1480,21 @@ op { type_attr: "T" } input_arg { - name: "grad" + name: "rho" type_attr: "T" } input_arg { name: "momentum" type_attr: "T" } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } output_arg { name: "out" type_attr: "T" @@ -1371,23 +1529,26 @@ op { b: false } } - attr { - name: "use_nesterov" - type: "bool" - default_value { - b: false - } - } } op { - name: "ApplyProximalAdagrad" + name: "ApplyCenteredRMSProp" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" + name: "mg" + type_attr: "T" + is_ref: true + } + input_arg { + name: "ms" + type_attr: "T" + is_ref: true + } + input_arg { + name: "mom" type_attr: "T" is_ref: true } @@ -1396,11 +1557,15 @@ op { type_attr: "T" } input_arg { - name: "l1" + name: "rho" type_attr: "T" } input_arg { - name: "l2" + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" type_attr: "T" } input_arg { @@ -1431,6 +1596,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1443,14 +1610,28 @@ op { } } op { - name: "ApplyProximalGradientDescent" + name: "ApplyFtrl" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "alpha" + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "linear" + type_attr: "T" + is_ref: true + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "lr" type_attr: "T" } input_arg { @@ -1462,7 +1643,7 @@ op { type_attr: "T" } input_arg { - name: "delta" + name: "lr_power" type_attr: "T" } output_arg { @@ -1501,40 +1682,40 @@ op { } } op { - name: "ApplyRMSProp" + name: "ApplyFtrl" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "ms" + name: "accum" type_attr: "T" is_ref: true } input_arg { - name: "mom" + name: "linear" type_attr: "T" is_ref: true } input_arg { - name: "lr" + name: "grad" type_attr: "T" } input_arg { - name: "rho" + name: "lr" type_attr: "T" } input_arg { - name: "momentum" + name: "l1" type_attr: "T" } input_arg { - name: "epsilon" + name: "l2" type_attr: "T" } input_arg { - name: "grad" + name: "lr_power" type_attr: "T" } output_arg { @@ -1561,6 +1742,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1573,18 +1756,50 @@ op { } } op { - name: "ApproximateEqual" + name: "ApplyFtrlV2" input_arg { - name: "x" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "y" + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "linear" + type_attr: "T" + is_ref: true + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" type_attr: "T" } output_arg { - name: "z" - type: DT_BOOL + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -1609,27 +1824,58 @@ op { } } attr { - name: "tolerance" - type: "float" + name: "use_locking" + type: "bool" default_value { - f: 1e-05 + b: false } } - is_commutative: true } op { - name: "ArgMax" + name: "ApplyFtrlV2" input_arg { - name: "input" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "dimension" - type_attr: "Tidx" + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "linear" + type_attr: "T" + is_ref: true + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } output_arg { - name: "output" - type: DT_INT64 + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -1650,36 +1896,38 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "Tidx" - type: "type" + name: "use_locking" + type: "bool" default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } } op { - name: "ArgMax" + name: "ApplyGradientDescent" input_arg { - name: "input" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "dimension" - type_attr: "Tidx" + name: "alpha" + type_attr: "T" + } + input_arg { + name: "delta" + type_attr: "T" } output_arg { - name: "output" - type_attr: "output_type" + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -1704,45 +1952,32 @@ op { } } attr { - name: "Tidx" - type: "type" - default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } - } - attr { - name: "output_type" - type: "type" + name: "use_locking" + type: "bool" default_value { - type: DT_INT64 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } } op { - name: "ArgMin" + name: "ApplyGradientDescent" input_arg { - name: "input" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "alpha" type_attr: "T" } input_arg { - name: "dimension" - type_attr: "Tidx" + name: "delta" + type_attr: "T" } output_arg { - name: "output" - type: DT_INT64 + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -1763,36 +1998,47 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "Tidx" - type: "type" + name: "use_locking" + type: "bool" default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } } op { - name: "ArgMin" + name: "ApplyMomentum" input_arg { - name: "input" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "dimension" - type_attr: "Tidx" + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "momentum" + type_attr: "T" } output_arg { - name: "output" - type_attr: "output_type" + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -1817,217 +2063,299 @@ op { } } attr { - name: "Tidx" - type: "type" + name: "use_locking" + type: "bool" default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } attr { - name: "output_type" - type: "type" + name: "use_nesterov" + type: "bool" default_value { - type: DT_INT64 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } } op { - name: "AsString" + name: "ApplyMomentum" input_arg { - name: "input" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "momentum" type_attr: "T" } output_arg { - name: "output" - type: DT_STRING + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" type: "type" allowed_values { list { - type: DT_INT32 - type: DT_INT64 - type: DT_COMPLEX64 type: DT_FLOAT type: DT_DOUBLE - type: DT_BOOL + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "precision" - type: "int" - default_value { - i: -1 - } - } - attr { - name: "scientific" + name: "use_locking" type: "bool" default_value { b: false } } attr { - name: "shortest" + name: "use_nesterov" type: "bool" default_value { b: false } } - attr { - name: "width" - type: "int" - default_value { - i: -1 - } - } - attr { - name: "fill" - type: "string" - default_value { - s: "" - } - } } op { - name: "Asin" + name: "ApplyProximalAdagrad" input_arg { - name: "x" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { - name: "y" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "Asinh" + name: "ApplyProximalAdagrad" input_arg { - name: "x" + name: "var" type_attr: "T" + is_ref: true } - output_arg { - name: "y" + input_arg { + name: "accum" type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } -} -op { - name: "Assert" - input_arg { - name: "condition" - type: DT_BOOL - } - input_arg { - name: "data" - type_list_attr: "T" - } - attr { - name: "T" - type: "list(type)" - has_minimum: true - minimum: 1 - } attr { - name: "summarize" - type: "int" + name: "use_locking" + type: "bool" default_value { - i: 3 + b: false } } - is_stateful: true } op { - name: "Assign" + name: "ApplyProximalGradientDescent" input_arg { - name: "ref" + name: "var" type_attr: "T" is_ref: true } input_arg { - name: "value" + name: "alpha" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "delta" type_attr: "T" } output_arg { - name: "output_ref" + name: "out" type_attr: "T" is_ref: true } attr { name: "T" type: "type" - } - attr { - name: "validate_shape" - type: "bool" - default_value { - b: true + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } } } attr { name: "use_locking" type: "bool" default_value { - b: true + b: false } } - allows_uninitialized_input: true } op { - name: "AssignAdd" + name: "ApplyProximalGradientDescent" input_arg { - name: "ref" + name: "var" type_attr: "T" is_ref: true } input_arg { - name: "value" + name: "alpha" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "delta" type_attr: "T" } output_arg { - name: "output_ref" + name: "out" type_attr: "T" is_ref: true } @@ -2050,6 +2378,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -2062,34 +2392,44 @@ op { } } op { - name: "AssignAddVariableOp" + name: "ApplyRMSProp" input_arg { - name: "resource" - type: DT_RESOURCE + name: "var" + type_attr: "T" + is_ref: true } input_arg { - name: "value" - type_attr: "dtype" - } - attr { - name: "dtype" - type: "type" + name: "ms" + type_attr: "T" + is_ref: true } - is_stateful: true -} -op { - name: "AssignSub" input_arg { - name: "ref" + name: "mom" type_attr: "T" is_ref: true } input_arg { - name: "value" + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { - name: "output_ref" + name: "out" type_attr: "T" is_ref: true } @@ -2124,76 +2464,92 @@ op { } } op { - name: "AssignSubVariableOp" + name: "ApplyRMSProp" input_arg { - name: "resource" - type: DT_RESOURCE + name: "var" + type_attr: "T" + is_ref: true } input_arg { - name: "value" - type_attr: "dtype" + name: "ms" + type_attr: "T" + is_ref: true } - attr { - name: "dtype" - type: "type" + input_arg { + name: "mom" + type_attr: "T" + is_ref: true } - is_stateful: true -} -op { - name: "AssignVariableOp" input_arg { - name: "resource" - type: DT_RESOURCE + name: "lr" + type_attr: "T" } input_arg { - name: "value" - type_attr: "dtype" + name: "rho" + type_attr: "T" } - attr { - name: "dtype" - type: "type" + input_arg { + name: "momentum" + type_attr: "T" } - is_stateful: true -} -op { - name: "Atan" input_arg { - name: "x" + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { - name: "y" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "Atan2" + name: "ApproximateEqual" input_arg { - name: "y" + name: "x" type_attr: "T" } input_arg { - name: "x" + name: "y" type_attr: "T" } output_arg { name: "z" - type_attr: "T" + type: DT_BOOL } attr { name: "T" @@ -2202,606 +2558,621 @@ op { list { type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } + attr { + name: "tolerance" + type: "float" + default_value { + f: 1e-05 + } + } + is_commutative: true } op { - name: "Atanh" + name: "ApproximateEqual" input_arg { name: "x" type_attr: "T" } - output_arg { + input_arg { name: "y" type_attr: "T" } + output_arg { + name: "z" + type: DT_BOOL + } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } -} -op { - name: "AudioSpectrogram" - input_arg { - name: "input" - type: DT_FLOAT - } - output_arg { - name: "spectrogram" - type: DT_FLOAT - } - attr { - name: "window_size" - type: "int" - } - attr { - name: "stride" - type: "int" - } attr { - name: "magnitude_squared" - type: "bool" + name: "tolerance" + type: "float" default_value { - b: false + f: 1e-05 } } + is_commutative: true } op { - name: "AudioSummary" + name: "ArgMax" input_arg { - name: "tag" - type: DT_STRING + name: "input" + type_attr: "T" } input_arg { - name: "tensor" - type: DT_FLOAT + name: "dimension" + type_attr: "Tidx" } output_arg { - name: "summary" - type: DT_STRING - } - attr { - name: "sample_rate" - type: "float" + name: "output" + type: DT_INT64 } attr { - name: "max_outputs" - type: "int" - default_value { - i: 3 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } } - has_minimum: true - minimum: 1 - } - deprecation { - version: 15 - } -} -op { - name: "AudioSummaryV2" - input_arg { - name: "tag" - type: DT_STRING - } - input_arg { - name: "tensor" - type: DT_FLOAT - } - input_arg { - name: "sample_rate" - type: DT_FLOAT - } - output_arg { - name: "summary" - type: DT_STRING } attr { - name: "max_outputs" - type: "int" + name: "Tidx" + type: "type" default_value { - i: 3 + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } } - has_minimum: true - minimum: 1 } } op { - name: "AvgPool" + name: "ArgMax" input_arg { - name: "value" + name: "input" type_attr: "T" } + input_arg { + name: "dimension" + type_attr: "Tidx" + } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 + type_attr: "output_type" } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "data_format" - type: "string" + name: "Tidx" + type: "type" default_value { - s: "NHWC" + type: DT_INT32 } allowed_values { list { - s: "NHWC" - s: "NCHW" + type: DT_INT32 + type: DT_INT64 } } } attr { - name: "T" + name: "output_type" type: "type" + default_value { + type: DT_INT64 + } allowed_values { list { - type: DT_FLOAT - type: DT_HALF - type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "AvgPool" + name: "ArgMax" input_arg { - name: "value" + name: "input" type_attr: "T" } + input_arg { + name: "dimension" + type_attr: "Tidx" + } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 + type_attr: "output_type" } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "data_format" - type: "string" + name: "Tidx" + type: "type" default_value { - s: "NHWC" + type: DT_INT32 } allowed_values { list { - s: "NHWC" - s: "NCHW" + type: DT_INT32 + type: DT_INT64 } } } attr { - name: "T" + name: "output_type" type: "type" + default_value { + type: DT_INT64 + } allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_HALF + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "AvgPool" + name: "ArgMin" input_arg { - name: "value" + name: "input" type_attr: "T" } + input_arg { + name: "dimension" + type_attr: "Tidx" + } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 + type: DT_INT64 } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "data_format" - type: "string" + name: "Tidx" + type: "type" default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } + type: DT_INT32 } - } - attr { - name: "T" - type: "type" allowed_values { list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "AvgPool3D" + name: "ArgMin" input_arg { name: "input" type_attr: "T" } + input_arg { + name: "dimension" + type_attr: "Tidx" + } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 5 + type_attr: "output_type" } attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 5 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "padding" - type: "string" + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_INT32 + type: DT_INT64 } } } attr { - name: "T" + name: "output_type" type: "type" + default_value { + type: DT_INT64 + } allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "AvgPool3D" + name: "ArgMin" input_arg { name: "input" type_attr: "T" } + input_arg { + name: "dimension" + type_attr: "Tidx" + } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 5 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 5 + type_attr: "output_type" } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "data_format" - type: "string" + name: "Tidx" + type: "type" default_value { - s: "NDHWC" + type: DT_INT32 } allowed_values { list { - s: "NDHWC" - s: "NCDHW" + type: DT_INT32 + type: DT_INT64 } } } attr { - name: "T" + name: "output_type" type: "type" + default_value { + type: DT_INT64 + } allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "AvgPool3DGrad" - input_arg { - name: "orig_input_shape" - type: DT_INT32 - } + name: "AsString" input_arg { - name: "grad" + name: "input" type_attr: "T" } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 5 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 5 - } - attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + type: DT_STRING } attr { name: "T" type: "type" allowed_values { list { + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 type: DT_FLOAT type: DT_DOUBLE + type: DT_BOOL + type: DT_INT8 } } } -} -op { - name: "AvgPool3DGrad" - input_arg { - name: "orig_input_shape" - type: DT_INT32 - } - input_arg { - name: "grad" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" + attr { + name: "precision" + type: "int" + default_value { + i: -1 + } } attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 5 + name: "scientific" + type: "bool" + default_value { + b: false + } } attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 5 + name: "shortest" + type: "bool" + default_value { + b: false + } } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } + name: "width" + type: "int" + default_value { + i: -1 } } attr { - name: "data_format" + name: "fill" type: "string" default_value { - s: "NDHWC" - } - allowed_values { - list { - s: "NDHWC" - s: "NCDHW" - } + s: "" } } +} +op { + name: "Asin" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } attr { name: "T" type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "AvgPoolGrad" - input_arg { - name: "orig_input_shape" - type: DT_INT32 - } + name: "Asinh" input_arg { - name: "grad" + name: "x" type_attr: "T" } output_arg { - name: "output" + name: "y" type_attr: "T" } attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } - } +} +op { + name: "Assert" + input_arg { + name: "condition" + type: DT_BOOL + } + input_arg { + name: "data" + type_list_attr: "T" } attr { name: "T" - type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_HALF - type: DT_DOUBLE - } + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "summarize" + type: "int" + default_value { + i: 3 } } + is_stateful: true } op { - name: "AvgPoolGrad" + name: "Assign" input_arg { - name: "orig_input_shape" - type: DT_INT32 + name: "ref" + type_attr: "T" + is_ref: true } input_arg { - name: "grad" + name: "value" type_attr: "T" } output_arg { - name: "output" + name: "output_ref" type_attr: "T" + is_ref: true } attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 + name: "T" + type: "type" } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } + name: "validate_shape" + type: "bool" + default_value { + b: true } } attr { - name: "data_format" - type: "string" + name: "use_locking" + type: "bool" default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } + b: true } } + allows_uninitialized_input: true +} +op { + name: "AssignAdd" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "value" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } attr { name: "T" type: "type" @@ -2809,126 +3180,133 @@ op { list { type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "AvgPoolGrad" + name: "AssignAdd" input_arg { - name: "orig_input_shape" - type: DT_INT32 + name: "ref" + type_attr: "T" + is_ref: true } input_arg { - name: "grad" + name: "value" type_attr: "T" } output_arg { - name: "output" + name: "output_ref" type_attr: "T" - } - attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 - } - attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } - } - attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } - } + is_ref: true } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "Barrier" - output_arg { - name: "handle" - type: DT_STRING - is_ref: true - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "shapes" - type: "list(shape)" - default_value { - list { - } - } - has_minimum: true - } - attr { - name: "capacity" - type: "int" - default_value { - i: -1 - } + name: "AssignAddVariableOp" + input_arg { + name: "resource" + type: DT_RESOURCE } - attr { - name: "container" - type: "string" - default_value { - s: "" - } + input_arg { + name: "value" + type_attr: "dtype" } attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + name: "dtype" + type: "type" } is_stateful: true } op { - name: "BarrierClose" + name: "AssignSub" input_arg { - name: "handle" - type: DT_STRING + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "value" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" is_ref: true } attr { - name: "cancel_pending_enqueues" + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "use_locking" type: "bool" default_value { b: false @@ -2936,112 +3314,93 @@ op { } } op { - name: "BarrierIncompleteSize" - input_arg { - name: "handle" - type: DT_STRING - is_ref: true - } - output_arg { - name: "size" - type: DT_INT32 - } -} -op { - name: "BarrierInsertMany" + name: "AssignSub" input_arg { - name: "handle" - type: DT_STRING + name: "ref" + type_attr: "T" is_ref: true } input_arg { - name: "keys" - type: DT_STRING + name: "value" + type_attr: "T" } - input_arg { - name: "values" + output_arg { + name: "output_ref" type_attr: "T" + is_ref: true } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } attr { - name: "component_index" - type: "int" + name: "use_locking" + type: "bool" + default_value { + b: false + } } } op { - name: "BarrierReadySize" + name: "AssignSubVariableOp" input_arg { - name: "handle" - type: DT_STRING - is_ref: true + name: "resource" + type: DT_RESOURCE } - output_arg { - name: "size" - type: DT_INT32 + input_arg { + name: "value" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" } + is_stateful: true } op { - name: "BarrierTakeMany" + name: "AssignVariableOp" input_arg { - name: "handle" - type: DT_STRING - is_ref: true + name: "resource" + type: DT_RESOURCE } input_arg { - name: "num_elements" - type: DT_INT32 - } - output_arg { - name: "indices" - type: DT_INT64 - } - output_arg { - name: "keys" - type: DT_STRING - } - output_arg { - name: "values" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "allow_small_batch" - type: "bool" - default_value { - b: false - } - } - attr { - name: "wait_for_incomplete" - type: "bool" - default_value { - b: false - } + name: "value" + type_attr: "dtype" } attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "dtype" + type: "type" } + is_stateful: true } op { - name: "BatchCholesky" + name: "Atan" input_arg { - name: "input" + name: "x" type_attr: "T" } output_arg { - name: "output" + name: "y" type_attr: "T" } attr { @@ -3049,27 +3408,29 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE + type: DT_HALF type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - deprecation { - version: 13 - } } op { - name: "BatchCholeskyGrad" + name: "Atan2" input_arg { - name: "l" + name: "y" type_attr: "T" } input_arg { - name: "grad" + name: "x" type_attr: "T" } output_arg { - name: "output" + name: "z" type_attr: "T" } attr { @@ -3082,157 +3443,120 @@ op { } } } - deprecation { - version: 13 - } } op { - name: "BatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } + name: "Atanh" input_arg { - name: "batch_size" - type: DT_INT64 + name: "x" + type_attr: "T" } output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "y" + type_attr: "T" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } - is_stateful: true } op { - name: "BatchDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } + name: "AudioSpectrogram" input_arg { - name: "batch_size" - type: DT_INT64 + name: "input" + type: DT_FLOAT } output_arg { - name: "handle" - type: DT_VARIANT + name: "spectrogram" + type: DT_FLOAT } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "window_size" + type: "int" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} -op { - name: "BatchFFT" - input_arg { - name: "input" - type: DT_COMPLEX64 - } - output_arg { - name: "output" - type: DT_COMPLEX64 + name: "stride" + type: "int" } - deprecation { - version: 15 + attr { + name: "magnitude_squared" + type: "bool" + default_value { + b: false + } } } op { - name: "BatchFFT2D" + name: "AudioSummary" input_arg { - name: "input" - type: DT_COMPLEX64 - } - output_arg { - name: "output" - type: DT_COMPLEX64 - } - deprecation { - version: 15 + name: "tag" + type: DT_STRING } -} -op { - name: "BatchFFT3D" input_arg { - name: "input" - type: DT_COMPLEX64 + name: "tensor" + type: DT_FLOAT } output_arg { - name: "output" - type: DT_COMPLEX64 - } - deprecation { - version: 15 + name: "summary" + type: DT_STRING } -} -op { - name: "BatchIFFT" - input_arg { - name: "input" - type: DT_COMPLEX64 + attr { + name: "sample_rate" + type: "float" } - output_arg { - name: "output" - type: DT_COMPLEX64 + attr { + name: "max_outputs" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 } deprecation { version: 15 } } op { - name: "BatchIFFT2D" + name: "AudioSummaryV2" input_arg { - name: "input" - type: DT_COMPLEX64 - } - output_arg { - name: "output" - type: DT_COMPLEX64 + name: "tag" + type: DT_STRING } - deprecation { - version: 15 + input_arg { + name: "tensor" + type: DT_FLOAT } -} -op { - name: "BatchIFFT3D" input_arg { - name: "input" - type: DT_COMPLEX64 + name: "sample_rate" + type: DT_FLOAT } output_arg { - name: "output" - type: DT_COMPLEX64 + name: "summary" + type: DT_STRING } - deprecation { - version: 15 + attr { + name: "max_outputs" + type: "int" + default_value { + i: 3 + } + has_minimum: true + minimum: 1 } } op { - name: "BatchMatMul" - input_arg { - name: "x" - type_attr: "T" - } + name: "AvgPool" input_arg { - name: "y" + name: "value" type_attr: "T" } output_arg { @@ -3240,88 +3564,56 @@ op { type_attr: "T" } attr { - name: "T" - type: "type" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" allowed_values { list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_COMPLEX64 - type: DT_COMPLEX128 + s: "SAME" + s: "VALID" } } } attr { - name: "adj_x" - type: "bool" + name: "data_format" + type: "string" default_value { - b: false + s: "NHWC" } - } - attr { - name: "adj_y" - type: "bool" - default_value { - b: false + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } -} -op { - name: "BatchMatrixBandPart" - input_arg { - name: "input" - type_attr: "T" - } - input_arg { - name: "num_lower" - type: DT_INT64 - } - input_arg { - name: "num_upper" - type: DT_INT64 - } - output_arg { - name: "band" - type_attr: "T" - } - attr { - name: "T" - type: "type" - } - deprecation { - version: 14 - } -} -op { - name: "BatchMatrixDeterminant" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" - } attr { name: "T" type: "type" allowed_values { list { type: DT_FLOAT + type: DT_HALF type: DT_DOUBLE } } } - deprecation { - version: 13 - } } op { - name: "BatchMatrixDeterminant" + name: "AvgPool" input_arg { - name: "input" + name: "value" type_attr: "T" } output_arg { @@ -3329,72 +3621,95 @@ op { type_attr: "T" } attr { - name: "T" - type: "type" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 + s: "SAME" + s: "VALID" } } } - deprecation { - version: 13 - } -} -op { - name: "BatchMatrixDiag" - input_arg { - name: "diagonal" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } } attr { name: "T" type: "type" - } - deprecation { - version: 14 + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_HALF + } + } } } op { - name: "BatchMatrixDiagPart" + name: "AvgPool" input_arg { - name: "input" + name: "value" type_attr: "T" } output_arg { - name: "diagonal" + name: "output" type_attr: "T" } attr { - name: "T" - type: "type" - } - deprecation { - version: 14 + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 } -} -op { - name: "BatchMatrixInverse" - input_arg { - name: "input" - type_attr: "T" + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 } - output_arg { - name: "output" - type_attr: "T" + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } } attr { - name: "adjoint" - type: "bool" + name: "data_format" + type: "string" default_value { - b: false + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } attr { @@ -3402,119 +3717,60 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE + type: DT_HALF type: DT_FLOAT + type: DT_DOUBLE } } } - deprecation { - version: 13 - } } op { - name: "BatchMatrixSetDiag" + name: "AvgPool3D" input_arg { name: "input" type_attr: "T" } - input_arg { - name: "diagonal" - type_attr: "T" - } output_arg { name: "output" type_attr: "T" } attr { - name: "T" - type: "type" - } - deprecation { - version: 14 - } -} -op { - name: "BatchMatrixSolve" - input_arg { - name: "matrix" - type_attr: "T" - } - input_arg { - name: "rhs" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 } attr { - name: "adjoint" - type: "bool" - default_value { - b: false - } + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 } attr { - name: "T" - type: "type" + name: "padding" + type: "string" allowed_values { list { - type: DT_DOUBLE - type: DT_FLOAT + s: "SAME" + s: "VALID" } } } - deprecation { - version: 13 - } -} -op { - name: "BatchMatrixSolveLs" - input_arg { - name: "matrix" - type_attr: "T" - } - input_arg { - name: "rhs" - type_attr: "T" - } - input_arg { - name: "l2_regularizer" - type: DT_DOUBLE - } - output_arg { - name: "output" - type_attr: "T" - } attr { name: "T" type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT + type: DT_DOUBLE } } } - attr { - name: "fast" - type: "bool" - default_value { - b: true - } - } - deprecation { - version: 13 - } } op { - name: "BatchMatrixTriangularSolve" - input_arg { - name: "matrix" - type_attr: "T" - } + name: "AvgPool3D" input_arg { - name: "rhs" + name: "input" type_attr: "T" } output_arg { @@ -3522,17 +3778,38 @@ op { type_attr: "T" } attr { - name: "lower" - type: "bool" - default_value { - b: true + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } } } attr { - name: "adjoint" - type: "bool" + name: "data_format" + type: "string" default_value { - b: false + s: "NDHWC" + } + allowed_values { + list { + s: "NDHWC" + s: "NCDHW" + } } } attr { @@ -3540,40 +3817,47 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT + type: DT_DOUBLE } } } - deprecation { - version: 13 - } } op { - name: "BatchNormWithGlobalNormalization" + name: "AvgPool3DGrad" input_arg { - name: "t" - type_attr: "T" + name: "orig_input_shape" + type: DT_INT32 } input_arg { - name: "m" + name: "grad" type_attr: "T" } - input_arg { - name: "v" + output_arg { + name: "output" type_attr: "T" } - input_arg { - name: "beta" - type_attr: "T" + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 } - input_arg { - name: "gamma" - type_attr: "T" + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 } - output_arg { - name: "result" - type_attr: "T" + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } } attr { name: "T" @@ -3582,74 +3866,58 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } - attr { - name: "variance_epsilon" - type: "float" - } - attr { - name: "scale_after_normalization" - type: "bool" - } - deprecation { - version: 9 - } } op { - name: "BatchNormWithGlobalNormalizationGrad" - input_arg { - name: "t" - type_attr: "T" - } - input_arg { - name: "m" - type_attr: "T" - } - input_arg { - name: "v" - type_attr: "T" - } + name: "AvgPool3DGrad" input_arg { - name: "gamma" - type_attr: "T" + name: "orig_input_shape" + type: DT_INT32 } input_arg { - name: "backprop" + name: "grad" type_attr: "T" } output_arg { - name: "dx" + name: "output" type_attr: "T" } - output_arg { - name: "dm" - type_attr: "T" + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 } - output_arg { - name: "dv" - type_attr: "T" + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 } - output_arg { - name: "db" - type_attr: "T" + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } } - output_arg { - name: "dg" - type_attr: "T" + attr { + name: "data_format" + type: "string" + default_value { + s: "NDHWC" + } + allowed_values { + list { + s: "NDHWC" + s: "NCDHW" + } + } } attr { name: "T" @@ -3658,37 +3926,18 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } - attr { - name: "variance_epsilon" - type: "float" - } - attr { - name: "scale_after_normalization" - type: "bool" - } - deprecation { - version: 9 - } } op { - name: "BatchSelfAdjointEig" + name: "AvgPoolGrad" input_arg { - name: "input" + name: "orig_input_shape" + type: DT_INT32 + } + input_arg { + name: "grad" type_attr: "T" } output_arg { @@ -3696,38 +3945,38 @@ op { type_attr: "T" } attr { - name: "T" - type: "type" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" allowed_values { list { - type: DT_DOUBLE - type: DT_FLOAT + s: "SAME" + s: "VALID" } } } - deprecation { - version: 11 - } -} -op { - name: "BatchSelfAdjointEigV2" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "e" - type_attr: "T" - } - output_arg { - name: "v" - type_attr: "T" - } attr { - name: "compute_v" - type: "bool" + name: "data_format" + type: "string" default_value { - b: true + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } attr { @@ -3735,45 +3984,60 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT + type: DT_HALF + type: DT_DOUBLE } } } - deprecation { - version: 13 - } } op { - name: "BatchSvd" + name: "AvgPoolGrad" input_arg { - name: "input" - type_attr: "T" + name: "orig_input_shape" + type: DT_INT32 } - output_arg { - name: "s" + input_arg { + name: "grad" type_attr: "T" } output_arg { - name: "u" + name: "output" type_attr: "T" } - output_arg { - name: "v" - type_attr: "T" + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 } attr { - name: "compute_uv" - type: "bool" - default_value { - b: true + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } } } attr { - name: "full_matrices" - type: "bool" + name: "data_format" + type: "string" default_value { - b: false + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } attr { @@ -3781,71 +4045,159 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_DOUBLE + type: DT_HALF } } } - deprecation { - version: 13 - } } op { - name: "BatchToSpace" + name: "AvgPoolGrad" input_arg { - name: "input" - type_attr: "T" + name: "orig_input_shape" + type: DT_INT32 } input_arg { - name: "crops" - type_attr: "Tidx" + name: "grad" + type_attr: "T" } output_arg { name: "output" type_attr: "T" } attr { - name: "T" - type: "type" - } - attr { - name: "block_size" - type: "int" + name: "ksize" + type: "list(int)" has_minimum: true - minimum: 2 + minimum: 4 } attr { - name: "Tidx" - type: "type" + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" default_value { - type: DT_INT32 + s: "NHWC" } allowed_values { list { - type: DT_INT32 - type: DT_INT64 + s: "NHWC" + s: "NCHW" + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE } } } } op { - name: "BatchToSpaceND" - input_arg { - name: "input" - type_attr: "T" + name: "Barrier" + output_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + default_value { + list { + } + } + has_minimum: true + } + attr { + name: "capacity" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } } + is_stateful: true +} +op { + name: "BarrierClose" input_arg { - name: "block_shape" - type_attr: "Tblock_shape" + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "cancel_pending_enqueues" + type: "bool" + default_value { + b: false + } } +} +op { + name: "BarrierIncompleteSize" input_arg { - name: "crops" - type_attr: "Tcrops" + name: "handle" + type: DT_STRING + is_ref: true } output_arg { - name: "output" + name: "size" + type: DT_INT32 + } +} +op { + name: "BarrierInsertMany" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "keys" + type: DT_STRING + } + input_arg { + name: "values" type_attr: "T" } attr { @@ -3853,48 +4205,81 @@ op { type: "type" } attr { - name: "Tblock_shape" - type: "type" + name: "component_index" + type: "int" + } +} +op { + name: "BarrierReadySize" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "size" + type: DT_INT32 + } +} +op { + name: "BarrierTakeMany" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "num_elements" + type: DT_INT32 + } + output_arg { + name: "indices" + type: DT_INT64 + } + output_arg { + name: "keys" + type: DT_STRING + } + output_arg { + name: "values" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "allow_small_batch" + type: "bool" default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + b: false } } attr { - name: "Tcrops" - type: "type" + name: "wait_for_incomplete" + type: "bool" default_value { - type: DT_INT32 + b: false } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 } } } op { - name: "Betainc" - input_arg { - name: "a" - type_attr: "T" - } - input_arg { - name: "b" - type_attr: "T" - } + name: "BatchCholesky" input_arg { - name: "x" + name: "input" type_attr: "T" } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { @@ -3902,20 +4287,23 @@ op { type: "type" allowed_values { list { - type: DT_FLOAT type: DT_DOUBLE + type: DT_FLOAT } } } + deprecation { + version: 13 + } } op { - name: "BiasAdd" + name: "BatchCholeskyGrad" input_arg { - name: "value" + name: "l" type_attr: "T" } input_arg { - name: "bias" + name: "grad" type_attr: "T" } output_arg { @@ -3929,39 +4317,160 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } + deprecation { + version: 13 + } +} +op { + name: "BatchDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } - } + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 } + is_stateful: true } op { - name: "BiasAddGrad" + name: "BatchDataset" input_arg { - name: "out_backprop" + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_size" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "BatchFFT" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchFFT2D" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchFFT3D" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchIFFT" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchIFFT2D" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchIFFT3D" + input_arg { + name: "input" + type: DT_COMPLEX64 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } + deprecation { + version: 15 + } +} +op { + name: "BatchMatMul" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" type_attr: "T" } output_arg { @@ -3973,90 +4482,64 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } attr { - name: "data_format" - type: "string" + name: "adj_x" + type: "bool" default_value { - s: "NHWC" + b: false } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } + } + attr { + name: "adj_y" + type: "bool" + default_value { + b: false } } } op { - name: "BiasAddV1" + name: "BatchMatrixBandPart" input_arg { - name: "value" + name: "input" type_attr: "T" } input_arg { - name: "bias" - type_attr: "T" + name: "num_lower" + type: DT_INT64 + } + input_arg { + name: "num_upper" + type: DT_INT64 } output_arg { - name: "output" + name: "band" type_attr: "T" } attr { name: "T" type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF - } - } + } + deprecation { + version: 14 } } op { - name: "Bincount" - input_arg { - name: "arr" - type: DT_INT32 - } - input_arg { - name: "size" - type: DT_INT32 - } + name: "BatchMatrixDeterminant" input_arg { - name: "weights" + name: "input" type_attr: "T" } output_arg { - name: "bins" + name: "output" type_attr: "T" } attr { @@ -4064,23 +4547,24 @@ op { type: "type" allowed_values { list { - type: DT_INT32 - type: DT_INT64 type: DT_FLOAT type: DT_DOUBLE } } } + deprecation { + version: 13 + } } op { - name: "Bitcast" + name: "BatchMatrixDeterminant" input_arg { name: "input" type_attr: "T" } output_arg { name: "output" - type_attr: "type" + type_attr: "T" } attr { name: "T" @@ -4089,175 +4573,155 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } + deprecation { + version: 13 + } +} +op { + name: "BatchMatrixDiag" + input_arg { + name: "diagonal" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } attr { - name: "type" + name: "T" type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF - } - } + } + deprecation { + version: 14 } } op { - name: "Bitcast" + name: "BatchMatrixDiagPart" input_arg { name: "input" type_attr: "T" } output_arg { - name: "output" - type_attr: "type" + name: "diagonal" + type_attr: "T" } attr { name: "T" type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT8 - type: DT_INT16 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 - type: DT_HALF - } + } + deprecation { + version: 14 + } +} +op { + name: "BatchMatrixInverse" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "adjoint" + type: "bool" + default_value { + b: false } } attr { - name: "type" + name: "T" type: "type" allowed_values { list { - type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT8 - type: DT_INT16 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 - type: DT_HALF + type: DT_FLOAT } } } + deprecation { + version: 13 + } } op { - name: "BitwiseAnd" + name: "BatchMatrixSetDiag" input_arg { - name: "x" + name: "input" type_attr: "T" } input_arg { - name: "y" + name: "diagonal" type_attr: "T" } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { name: "T" type: "type" - allowed_values { - list { - type: DT_INT8 - type: DT_INT16 - type: DT_INT32 - type: DT_INT64 - type: DT_UINT8 - type: DT_UINT16 - } - } } - is_commutative: true + deprecation { + version: 14 + } } op { - name: "BitwiseOr" + name: "BatchMatrixSolve" input_arg { - name: "x" + name: "matrix" type_attr: "T" } input_arg { - name: "y" + name: "rhs" type_attr: "T" } output_arg { - name: "z" + name: "output" type_attr: "T" } + attr { + name: "adjoint" + type: "bool" + default_value { + b: false + } + } attr { name: "T" type: "type" allowed_values { list { - type: DT_INT8 - type: DT_INT16 - type: DT_INT32 - type: DT_INT64 - type: DT_UINT8 - type: DT_UINT16 + type: DT_DOUBLE + type: DT_FLOAT } } } - is_commutative: true + deprecation { + version: 13 + } } op { - name: "BitwiseXor" + name: "BatchMatrixSolveLs" input_arg { - name: "x" + name: "matrix" type_attr: "T" } input_arg { - name: "y" + name: "rhs" type_attr: "T" } + input_arg { + name: "l2_regularizer" + type: DT_DOUBLE + } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { @@ -4265,358 +4729,575 @@ op { type: "type" allowed_values { list { - type: DT_INT8 - type: DT_INT16 - type: DT_INT32 - type: DT_INT64 - type: DT_UINT8 - type: DT_UINT16 + type: DT_DOUBLE + type: DT_FLOAT } } } - is_commutative: true + attr { + name: "fast" + type: "bool" + default_value { + b: true + } + } + deprecation { + version: 13 + } } op { - name: "BroadcastArgs" + name: "BatchMatrixTriangularSolve" input_arg { - name: "s0" + name: "matrix" type_attr: "T" } input_arg { - name: "s1" + name: "rhs" type_attr: "T" } output_arg { - name: "r0" + name: "output" type_attr: "T" } attr { - name: "T" - type: "type" + name: "lower" + type: "bool" default_value { - type: DT_INT32 + b: true + } + } + attr { + name: "adjoint" + type: "bool" + default_value { + b: false } + } + attr { + name: "T" + type: "type" allowed_values { list { - type: DT_INT32 - type: DT_INT64 + type: DT_DOUBLE + type: DT_FLOAT } } } + deprecation { + version: 13 + } } op { - name: "BroadcastGradientArgs" + name: "BatchNormWithGlobalNormalization" input_arg { - name: "s0" + name: "t" type_attr: "T" } input_arg { - name: "s1" + name: "m" type_attr: "T" } - output_arg { - name: "r0" + input_arg { + name: "v" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "gamma" type_attr: "T" } output_arg { - name: "r1" + name: "result" type_attr: "T" } attr { name: "T" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } + attr { + name: "variance_epsilon" + type: "float" + } + attr { + name: "scale_after_normalization" + type: "bool" + } + deprecation { + version: 9 + } } op { - name: "Bucketize" + name: "BatchNormWithGlobalNormalization" input_arg { - name: "input" + name: "t" + type_attr: "T" + } + input_arg { + name: "m" + type_attr: "T" + } + input_arg { + name: "v" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "gamma" type_attr: "T" } output_arg { - name: "output" - type: DT_INT32 + name: "result" + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { - type: DT_INT32 - type: DT_INT64 type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "boundaries" - type: "list(float)" + name: "variance_epsilon" + type: "float" + } + attr { + name: "scale_after_normalization" + type: "bool" + } + deprecation { + version: 9 } } op { - name: "CTCBeamSearchDecoder" + name: "BatchNormWithGlobalNormalizationGrad" input_arg { - name: "inputs" - type: DT_FLOAT + name: "t" + type_attr: "T" } input_arg { - name: "sequence_length" - type: DT_INT32 + name: "m" + type_attr: "T" + } + input_arg { + name: "v" + type_attr: "T" + } + input_arg { + name: "gamma" + type_attr: "T" + } + input_arg { + name: "backprop" + type_attr: "T" } output_arg { - name: "decoded_indices" - type: DT_INT64 - number_attr: "top_paths" + name: "dx" + type_attr: "T" } output_arg { - name: "decoded_values" - type: DT_INT64 - number_attr: "top_paths" + name: "dm" + type_attr: "T" } output_arg { - name: "decoded_shape" - type: DT_INT64 - number_attr: "top_paths" + name: "dv" + type_attr: "T" } output_arg { - name: "log_probability" - type: DT_FLOAT + name: "db" + type_attr: "T" + } + output_arg { + name: "dg" + type_attr: "T" } attr { - name: "beam_width" - type: "int" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "top_paths" - type: "int" - has_minimum: true - minimum: 1 + name: "variance_epsilon" + type: "float" } attr { - name: "merge_repeated" + name: "scale_after_normalization" type: "bool" - default_value { - b: true - } + } + deprecation { + version: 9 } } op { - name: "CTCGreedyDecoder" + name: "BatchNormWithGlobalNormalizationGrad" input_arg { - name: "inputs" - type: DT_FLOAT + name: "t" + type_attr: "T" } input_arg { - name: "sequence_length" - type: DT_INT32 + name: "m" + type_attr: "T" + } + input_arg { + name: "v" + type_attr: "T" + } + input_arg { + name: "gamma" + type_attr: "T" + } + input_arg { + name: "backprop" + type_attr: "T" } output_arg { - name: "decoded_indices" - type: DT_INT64 + name: "dx" + type_attr: "T" } output_arg { - name: "decoded_values" - type: DT_INT64 + name: "dm" + type_attr: "T" } output_arg { - name: "decoded_shape" - type: DT_INT64 + name: "dv" + type_attr: "T" } output_arg { - name: "log_probability" - type: DT_FLOAT + name: "db" + type_attr: "T" + } + output_arg { + name: "dg" + type_attr: "T" } attr { - name: "merge_repeated" - type: "bool" - default_value { - b: false + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } } } + attr { + name: "variance_epsilon" + type: "float" + } + attr { + name: "scale_after_normalization" + type: "bool" + } + deprecation { + version: 9 + } } op { - name: "CTCLoss" + name: "BatchSelfAdjointEig" input_arg { - name: "inputs" - type: DT_FLOAT + name: "input" + type_attr: "T" } - input_arg { - name: "labels_indices" - type: DT_INT64 + output_arg { + name: "output" + type_attr: "T" } - input_arg { - name: "labels_values" - type: DT_INT32 + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + } + } + } + deprecation { + version: 11 } +} +op { + name: "BatchSelfAdjointEigV2" input_arg { - name: "sequence_length" - type: DT_INT32 + name: "input" + type_attr: "T" } output_arg { - name: "loss" - type: DT_FLOAT + name: "e" + type_attr: "T" } output_arg { - name: "gradient" - type: DT_FLOAT + name: "v" + type_attr: "T" } attr { - name: "preprocess_collapse_repeated" + name: "compute_v" type: "bool" default_value { - b: false + b: true } } attr { - name: "ctc_merge_repeated" - type: "bool" - default_value { - b: true + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + } } } + deprecation { + version: 13 + } } op { - name: "CTCLoss" + name: "BatchSvd" input_arg { - name: "inputs" - type: DT_FLOAT - } - input_arg { - name: "labels_indices" - type: DT_INT64 - } - input_arg { - name: "labels_values" - type: DT_INT32 + name: "input" + type_attr: "T" } - input_arg { - name: "sequence_length" - type: DT_INT32 + output_arg { + name: "s" + type_attr: "T" } output_arg { - name: "loss" - type: DT_FLOAT + name: "u" + type_attr: "T" } output_arg { - name: "gradient" - type: DT_FLOAT + name: "v" + type_attr: "T" } attr { - name: "preprocess_collapse_repeated" + name: "compute_uv" type: "bool" default_value { - b: false + b: true } } attr { - name: "ctc_merge_repeated" + name: "full_matrices" type: "bool" default_value { - b: true + b: false } } attr { - name: "ignore_longer_outputs_than_inputs" - type: "bool" - default_value { - b: false + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } } } + deprecation { + version: 13 + } } op { - name: "CacheDataset" + name: "BatchToSpace" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "input" + type_attr: "T" } input_arg { - name: "filename" - type: DT_STRING + name: "crops" + type_attr: "Tidx" } output_arg { - name: "handle" - type: DT_VARIANT + name: "output" + type_attr: "T" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" + name: "block_size" + type: "int" has_minimum: true - minimum: 1 + minimum: 2 + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } - is_stateful: true } op { - name: "CacheDataset" + name: "BatchToSpaceND" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "input" + type_attr: "T" } input_arg { - name: "filename" - type: DT_STRING + name: "block_shape" + type_attr: "Tblock_shape" + } + input_arg { + name: "crops" + type_attr: "Tcrops" } output_arg { - name: "handle" - type: DT_VARIANT + name: "output" + type_attr: "T" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Tblock_shape" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tcrops" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } } op { - name: "Cast" + name: "Betainc" + input_arg { + name: "a" + type_attr: "T" + } + input_arg { + name: "b" + type_attr: "T" + } input_arg { name: "x" - type_attr: "SrcT" + type_attr: "T" } output_arg { - name: "y" - type_attr: "DstT" - } - attr { - name: "SrcT" - type: "type" + name: "z" + type_attr: "T" } attr { - name: "DstT" + name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } } } op { - name: "Ceil" + name: "BiasAdd" input_arg { - name: "x" + name: "value" + type_attr: "T" + } + input_arg { + name: "bias" type_attr: "T" } output_arg { - name: "y" + name: "output" type_attr: "T" } attr { @@ -4624,17 +5305,45 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" } } } } op { - name: "CheckNumerics" + name: "BiasAdd" input_arg { - name: "tensor" + name: "value" + type_attr: "T" + } + input_arg { + name: "bias" type_attr: "T" } output_arg { @@ -4646,21 +5355,43 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "message" + name: "data_format" type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } } } op { - name: "Cholesky" + name: "BiasAddGrad" input_arg { - name: "input" + name: "out_backprop" type_attr: "T" } output_arg { @@ -4672,16 +5403,41 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" } } } } op { - name: "Cholesky" + name: "BiasAddGrad" input_arg { - name: "input" + name: "out_backprop" type_attr: "T" } output_arg { @@ -4693,22 +5449,47 @@ op { type: "type" allowed_values { list { - type: DT_DOUBLE type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" } } } } op { - name: "CholeskyGrad" + name: "BiasAddV1" input_arg { - name: "l" + name: "value" type_attr: "T" } input_arg { - name: "grad" + name: "bias" type_attr: "T" } output_arg { @@ -4722,236 +5503,344 @@ op { list { type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } } op { - name: "CompareAndBitpack" + name: "BiasAddV1" input_arg { - name: "input" + name: "value" type_attr: "T" } input_arg { - name: "threshold" + name: "bias" type_attr: "T" } output_arg { name: "output" - type: DT_UINT8 + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { - type: DT_BOOL - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT8 - type: DT_INT16 - type: DT_INT32 type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Complex" + name: "Bincount" input_arg { - name: "real" - type_attr: "T" + name: "arr" + type: DT_INT32 } input_arg { - name: "imag" + name: "size" + type: DT_INT32 + } + input_arg { + name: "weights" type_attr: "T" } output_arg { - name: "out" - type_attr: "Tout" + name: "bins" + type_attr: "T" } attr { name: "T" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { + type: DT_INT32 + type: DT_INT64 type: DT_FLOAT type: DT_DOUBLE } } } +} +op { + name: "Bitcast" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "type" + } attr { - name: "Tout" + name: "T" type: "type" - default_value { - type: DT_COMPLEX64 + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } } + } + attr { + name: "type" + type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } } op { - name: "ComplexAbs" + name: "Bitcast" input_arg { - name: "x" + name: "input" type_attr: "T" } output_arg { - name: "y" - type_attr: "Tout" + name: "output" + type_attr: "type" } attr { name: "T" type: "type" - default_value { - type: DT_COMPLEX64 - } allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT8 + type: DT_INT16 type: DT_COMPLEX64 type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "Tout" + name: "type" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT8 + type: DT_INT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + type: DT_HALF } } } } op { - name: "ComputeAccidentalHits" + name: "BitwiseAnd" input_arg { - name: "true_classes" - type: DT_INT64 + name: "x" + type_attr: "T" } input_arg { - name: "sampled_candidates" - type: DT_INT64 - } - output_arg { - name: "indices" - type: DT_INT32 - } - output_arg { - name: "ids" - type: DT_INT64 + name: "y" + type_attr: "T" } output_arg { - name: "weights" - type: DT_FLOAT - } - attr { - name: "num_true" - type: "int" - } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } + name: "z" + type_attr: "T" } attr { - name: "seed2" - type: "int" - default_value { - i: 0 + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_UINT16 + } } } + is_commutative: true } op { - name: "Concat" + name: "BitwiseOr" input_arg { - name: "concat_dim" - type: DT_INT32 + name: "x" + type_attr: "T" } input_arg { - name: "values" + name: "y" type_attr: "T" - number_attr: "N" } output_arg { - name: "output" + name: "z" type_attr: "T" } - attr { - name: "N" - type: "int" - has_minimum: true - minimum: 2 - } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_UINT16 + } + } } + is_commutative: true } op { - name: "ConcatOffset" + name: "BitwiseXor" input_arg { - name: "concat_dim" - type: DT_INT32 + name: "x" + type_attr: "T" } input_arg { - name: "shape" - type: DT_INT32 - number_attr: "N" + name: "y" + type_attr: "T" } output_arg { - name: "offset" - type: DT_INT32 - number_attr: "N" + name: "z" + type_attr: "T" } attr { - name: "N" - type: "int" - has_minimum: true - minimum: 2 + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_UINT16 + } + } } + is_commutative: true } op { - name: "ConcatV2" + name: "BroadcastArgs" input_arg { - name: "values" + name: "s0" type_attr: "T" - number_attr: "N" } input_arg { - name: "axis" - type_attr: "Tidx" + name: "s1" + type_attr: "T" } output_arg { - name: "output" + name: "r0" type_attr: "T" } - attr { - name: "N" - type: "int" - has_minimum: true - minimum: 2 - } attr { name: "T" type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "BroadcastGradientArgs" + input_arg { + name: "s0" + type_attr: "T" + } + input_arg { + name: "s1" + type_attr: "T" + } + output_arg { + name: "r0" + type_attr: "T" + } + output_arg { + name: "r1" + type_attr: "T" } attr { - name: "Tidx" + name: "T" type: "type" default_value { type: DT_INT32 @@ -4965,14 +5854,213 @@ op { } } op { - name: "ConcatenateDataset" + name: "Bucketize" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type: DT_INT32 + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "boundaries" + type: "list(float)" + } +} +op { + name: "CTCBeamSearchDecoder" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "sequence_length" + type: DT_INT32 + } + output_arg { + name: "decoded_indices" + type: DT_INT64 + number_attr: "top_paths" + } + output_arg { + name: "decoded_values" + type: DT_INT64 + number_attr: "top_paths" + } + output_arg { + name: "decoded_shape" + type: DT_INT64 + number_attr: "top_paths" + } + output_arg { + name: "log_probability" + type: DT_FLOAT + } + attr { + name: "beam_width" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "top_paths" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "merge_repeated" + type: "bool" + default_value { + b: true + } + } +} +op { + name: "CTCGreedyDecoder" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "sequence_length" + type: DT_INT32 + } + output_arg { + name: "decoded_indices" + type: DT_INT64 + } + output_arg { + name: "decoded_values" + type: DT_INT64 + } + output_arg { + name: "decoded_shape" + type: DT_INT64 + } + output_arg { + name: "log_probability" + type: DT_FLOAT + } + attr { + name: "merge_repeated" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "CTCLoss" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "labels_indices" + type: DT_INT64 + } + input_arg { + name: "labels_values" + type: DT_INT32 + } + input_arg { + name: "sequence_length" + type: DT_INT32 + } + output_arg { + name: "loss" + type: DT_FLOAT + } + output_arg { + name: "gradient" + type: DT_FLOAT + } + attr { + name: "preprocess_collapse_repeated" + type: "bool" + default_value { + b: false + } + } + attr { + name: "ctc_merge_repeated" + type: "bool" + default_value { + b: true + } + } +} +op { + name: "CTCLoss" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "labels_indices" + type: DT_INT64 + } + input_arg { + name: "labels_values" + type: DT_INT32 + } + input_arg { + name: "sequence_length" + type: DT_INT32 + } + output_arg { + name: "loss" + type: DT_FLOAT + } + output_arg { + name: "gradient" + type: DT_FLOAT + } + attr { + name: "preprocess_collapse_repeated" + type: "bool" + default_value { + b: false + } + } + attr { + name: "ctc_merge_repeated" + type: "bool" + default_value { + b: true + } + } + attr { + name: "ignore_longer_outputs_than_inputs" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "CacheDataset" input_arg { name: "input_dataset" type: DT_VARIANT } input_arg { - name: "another_dataset" - type: DT_VARIANT + name: "filename" + type: DT_STRING } output_arg { name: "handle" @@ -4993,14 +6081,14 @@ op { is_stateful: true } op { - name: "ConcatenateDataset" + name: "CacheDataset" input_arg { name: "input_dataset" type: DT_VARIANT } input_arg { - name: "another_dataset" - type: DT_VARIANT + name: "filename" + type: DT_STRING } output_arg { name: "handle" @@ -5020,56 +6108,74 @@ op { } } op { - name: "ConditionalAccumulator" + name: "Cast" + input_arg { + name: "x" + type_attr: "SrcT" + } output_arg { - name: "handle" - type: DT_STRING - is_ref: true + name: "y" + type_attr: "DstT" } attr { - name: "dtype" + name: "SrcT" + type: "type" + } + attr { + name: "DstT" + type: "type" + } +} +op { + name: "Ceil" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } + attr { + name: "T" type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF } } } - attr { - name: "shape" - type: "shape" +} +op { + name: "CheckNumerics" + input_arg { + name: "tensor" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" } attr { - name: "container" - type: "string" - default_value { - s: "" + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } } } attr { - name: "shared_name" + name: "message" type: "string" - default_value { - s: "" - } } - is_stateful: true } op { - name: "Conj" + name: "Cholesky" input_arg { name: "input" type_attr: "T" @@ -5081,19 +6187,16 @@ op { attr { name: "T" type: "type" - default_value { - type: DT_COMPLEX64 - } allowed_values { list { - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_DOUBLE + type: DT_FLOAT } } } } op { - name: "Conj" + name: "Cholesky" input_arg { name: "input" type_attr: "T" @@ -5105,130 +6208,595 @@ op { attr { name: "T" type: "type" - default_value { - type: DT_COMPLEX64 - } allowed_values { list { + type: DT_DOUBLE + type: DT_FLOAT type: DT_COMPLEX64 type: DT_COMPLEX128 - type: DT_VARIANT } } } } op { - name: "Const" + name: "CholeskyGrad" + input_arg { + name: "l" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } output_arg { name: "output" - type_attr: "dtype" - } - attr { - name: "value" - type: "tensor" + type_attr: "T" } attr { - name: "dtype" + name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } } } op { - name: "ControlTrigger" -} -op { - name: "Conv2D" + name: "CompareAndBitpack" input_arg { name: "input" type_attr: "T" } input_arg { - name: "filter" + name: "threshold" type_attr: "T" } output_arg { name: "output" - type_attr: "T" + type: DT_UINT8 } attr { name: "T" type: "type" allowed_values { list { + type: DT_BOOL type: DT_HALF type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 } } } - attr { - name: "strides" - type: "list(int)" +} +op { + name: "Complex" + input_arg { + name: "real" + type_attr: "T" + } + input_arg { + name: "imag" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "Tout" } attr { - name: "use_cudnn_on_gpu" - type: "bool" + name: "T" + type: "type" default_value { - b: true + type: DT_FLOAT } - } - attr { - name: "padding" - type: "string" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_FLOAT + type: DT_DOUBLE } } } attr { - name: "data_format" - type: "string" + name: "Tout" + type: "type" default_value { - s: "NHWC" + type: DT_COMPLEX64 } allowed_values { list { - s: "NHWC" - s: "NCHW" + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "Conv2DBackpropFilter" - input_arg { - name: "input" - type_attr: "T" - } - input_arg { - name: "filter_sizes" - type: DT_INT32 - } + name: "ComplexAbs" input_arg { - name: "out_backprop" + name: "x" type_attr: "T" } output_arg { - name: "output" - type_attr: "T" + name: "y" + type_attr: "Tout" } attr { name: "T" type: "type" + default_value { + type: DT_COMPLEX64 + } allowed_values { list { - type: DT_HALF - type: DT_FLOAT + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } attr { - name: "strides" - type: "list(int)" - } - attr { - name: "use_cudnn_on_gpu" - type: "bool" + name: "Tout" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "ComputeAccidentalHits" + input_arg { + name: "true_classes" + type: DT_INT64 + } + input_arg { + name: "sampled_candidates" + type: DT_INT64 + } + output_arg { + name: "indices" + type: DT_INT32 + } + output_arg { + name: "ids" + type: DT_INT64 + } + output_arg { + name: "weights" + type: DT_FLOAT + } + attr { + name: "num_true" + type: "int" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } +} +op { + name: "Concat" + input_arg { + name: "concat_dim" + type: DT_INT32 + } + input_arg { + name: "values" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 2 + } + attr { + name: "T" + type: "type" + } +} +op { + name: "ConcatOffset" + input_arg { + name: "concat_dim" + type: DT_INT32 + } + input_arg { + name: "shape" + type: DT_INT32 + number_attr: "N" + } + output_arg { + name: "offset" + type: DT_INT32 + number_attr: "N" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 2 + } +} +op { + name: "ConcatV2" + input_arg { + name: "values" + type_attr: "T" + number_attr: "N" + } + input_arg { + name: "axis" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 2 + } + attr { + name: "T" + type: "type" + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "ConcatenateDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "another_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "ConcatenateDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "another_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "ConditionalAccumulator" + output_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "ConditionalAccumulator" + output_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "Conj" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_COMPLEX64 + } + allowed_values { + list { + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Conj" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_COMPLEX64 + } + allowed_values { + list { + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_VARIANT + } + } + } +} +op { + name: "Const" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "value" + type: "tensor" + } + attr { + name: "dtype" + type: "type" + } +} +op { + name: "ControlTrigger" +} +op { + name: "Conv2D" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + } + } + } + attr { + name: "strides" + type: "list(int)" + } + attr { + name: "use_cudnn_on_gpu" + type: "bool" + default_value { + b: true + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } +} +op { + name: "Conv2DBackpropFilter" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter_sizes" + type: DT_INT32 + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + } + } + } + attr { + name: "strides" + type: "list(int)" + } + attr { + name: "use_cudnn_on_gpu" + type: "bool" default_value { b: true } @@ -6086,6 +7654,40 @@ op { } } } +op { + name: "Cross" + input_arg { + name: "a" + type_attr: "T" + } + input_arg { + name: "b" + type_attr: "T" + } + output_arg { + name: "product" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "Cumprod" input_arg { @@ -6151,7 +7753,7 @@ op { } } op { - name: "Cumsum" + name: "Cumprod" input_arg { name: "x" type_attr: "T" @@ -6197,6 +7799,138 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "Cumsum" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "axis" + type_attr: "Tidx" + } + output_arg { + name: "out" + type_attr: "T" + } + attr { + name: "exclusive" + type: "bool" + default_value { + b: false + } + } + attr { + name: "reverse" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "Cumsum" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "axis" + type_attr: "Tidx" + } + output_arg { + name: "out" + type_attr: "T" + } + attr { + name: "exclusive" + type: "bool" + default_value { + b: false + } + } + attr { + name: "reverse" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -7881,6 +9615,62 @@ op { } } } +op { + name: "Dilation2D" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "rates" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } +} op { name: "Dilation2DBackpropFilter" input_arg { @@ -7939,6 +9729,124 @@ op { } } } +op { + name: "Dilation2DBackpropFilter" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "filter_backprop" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "rates" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } +} +op { + name: "Dilation2DBackpropInput" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "filter" + type_attr: "T" + } + input_arg { + name: "out_backprop" + type_attr: "T" + } + output_arg { + name: "in_backprop" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "rates" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } +} op { name: "Dilation2DBackpropInput" input_arg { @@ -7971,6 +9879,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -8708,6 +10618,64 @@ op { } } } +op { + name: "ExtractImagePatches" + input_arg { + name: "images" + type_attr: "T" + } + output_arg { + name: "patches" + type_attr: "T" + } + attr { + name: "ksizes" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "rates" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } +} op { name: "ExtractJpegShape" input_arg { @@ -11078,6 +13046,40 @@ op { } } } +op { + name: "Greater" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type: DT_BOOL + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "GreaterEqual" input_arg { @@ -11110,6 +13112,40 @@ op { } } } +op { + name: "GreaterEqual" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type: DT_BOOL + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "GroupByWindowDataset" input_arg { @@ -11369,6 +13405,43 @@ op { } } } +op { + name: "HistogramSummary" + input_arg { + name: "tag" + type: DT_STRING + } + input_arg { + name: "values" + type_attr: "T" + } + output_arg { + name: "summary" + type: DT_STRING + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "IFFT" input_arg { @@ -12722,6 +14795,72 @@ op { } } } +op { + name: "Less" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type: DT_BOOL + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "LessEqual" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type: DT_BOOL + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} op { name: "LessEqual" input_arg { @@ -12750,6 +14889,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -14278,6 +16419,65 @@ op { } } } +op { + name: "Max" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "MaxPool" input_arg { @@ -14964,7 +17164,7 @@ op { } } op { - name: "MaxPoolGradGrad" + name: "MaxPoolGrad" input_arg { name: "orig_input" type_attr: "T" @@ -15019,6 +17219,9 @@ op { attr { name: "T" type: "type" + default_value { + type: DT_FLOAT + } allowed_values { list { type: DT_FLOAT @@ -15030,12 +17233,14 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "MaxPoolGradGradV2" + name: "MaxPoolGradGrad" input_arg { name: "orig_input" type_attr: "T" @@ -15048,18 +17253,22 @@ op { name: "grad" type_attr: "T" } - input_arg { - name: "ksize" - type: DT_INT32 - } - input_arg { - name: "strides" - type: DT_INT32 - } output_arg { name: "output" type_attr: "T" } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } attr { name: "padding" type: "string" @@ -15102,18 +17311,18 @@ op { } } op { - name: "MaxPoolGradGradWithArgmax" + name: "MaxPoolGradGrad" input_arg { - name: "input" + name: "orig_input" type_attr: "T" } input_arg { - name: "grad" + name: "orig_output" type_attr: "T" } input_arg { - name: "argmax" - type_attr: "Targmax" + name: "grad" + type_attr: "T" } output_arg { name: "output" @@ -15142,12 +17351,84 @@ op { } } attr { - name: "Targmax" + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } + attr { + name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "MaxPoolGradGradV2" + input_arg { + name: "orig_input" + type_attr: "T" + } + input_arg { + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" } } } @@ -15170,7 +17451,7 @@ op { } } op { - name: "MaxPoolGradV2" + name: "MaxPoolGradGradV2" input_arg { name: "orig_input" type_attr: "T" @@ -15221,9 +17502,6 @@ op { attr { name: "T" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { type: DT_FLOAT @@ -15235,12 +17513,14 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "MaxPoolGradWithArgmax" + name: "MaxPoolGradGradWithArgmax" input_arg { name: "input" type_attr: "T" @@ -15292,19 +17572,23 @@ op { attr { name: "T" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 type: DT_HALF } } } } op { - name: "MaxPoolGradWithArgmax" + name: "MaxPoolGradGradWithArgmax" input_arg { name: "input" type_attr: "T" @@ -15367,14 +17651,24 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "MaxPoolV2" + name: "MaxPoolGradV2" input_arg { - name: "input" + name: "orig_input" + type_attr: "T" + } + input_arg { + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } input_arg { @@ -15389,26 +17683,6 @@ op { name: "output" type_attr: "T" } - attr { - name: "T" - type: "type" - default_value { - type: DT_FLOAT - } - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_UINT16 - type: DT_HALF - } - } - } attr { name: "padding" type: "string" @@ -15432,25 +17706,6 @@ op { } } } -} -op { - name: "MaxPoolV2" - input_arg { - name: "input" - type_attr: "T" - } - input_arg { - name: "ksize" - type: DT_INT32 - } - input_arg { - name: "strides" - type: DT_INT32 - } - output_arg { - name: "output" - type_attr: "T" - } attr { name: "T" type: "type" @@ -15468,10 +17723,36 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF - type: DT_QINT8 } } } +} +op { + name: "MaxPoolGradV2" + input_arg { + name: "orig_input" + type_attr: "T" + } + input_arg { + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "padding" type: "string" @@ -15492,25 +17773,50 @@ op { list { s: "NHWC" s: "NCHW" - s: "NCHW_VECT_C" + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "MaxPoolWithArgmax" + name: "MaxPoolGradWithArgmax" input_arg { name: "input" type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "grad" type_attr: "T" } - output_arg { + input_arg { name: "argmax" type_attr: "Targmax" } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "ksize" type: "list(int)" @@ -15524,25 +17830,22 @@ op { minimum: 4 } attr { - name: "Targmax" - type: "type" - default_value { - type: DT_INT64 - } + name: "padding" + type: "string" allowed_values { list { - type: DT_INT32 - type: DT_INT64 + s: "SAME" + s: "VALID" } } } attr { - name: "padding" - type: "string" + name: "Targmax" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_INT32 + type: DT_INT64 } } } @@ -15561,19 +17864,23 @@ op { } } op { - name: "MaxPoolWithArgmax" + name: "MaxPoolGradWithArgmax" input_arg { name: "input" type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "grad" type_attr: "T" } - output_arg { + input_arg { name: "argmax" type_attr: "Targmax" } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "ksize" type: "list(int)" @@ -15587,25 +17894,22 @@ op { minimum: 4 } attr { - name: "Targmax" - type: "type" - default_value { - type: DT_INT64 - } + name: "padding" + type: "string" allowed_values { list { - type: DT_INT32 - type: DT_INT64 + s: "SAME" + s: "VALID" } } } attr { - name: "padding" - type: "string" + name: "Targmax" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_INT32 + type: DT_INT64 } } } @@ -15627,6 +17931,401 @@ op { } } } +op { + name: "MaxPoolGradWithArgmax" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "argmax" + type_attr: "Targmax" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "Targmax" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "MaxPoolV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } +} +op { + name: "MaxPoolV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_QINT8 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + s: "NCHW_VECT_C" + } + } + } +} +op { + name: "MaxPoolWithArgmax" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + output_arg { + name: "argmax" + type_attr: "Targmax" + } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "Targmax" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_HALF + } + } + } +} +op { + name: "MaxPoolWithArgmax" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + output_arg { + name: "argmax" + type_attr: "Targmax" + } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "Targmax" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} +op { + name: "MaxPoolWithArgmax" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + output_arg { + name: "argmax" + type_attr: "Targmax" + } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "Targmax" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "Maximum" input_arg { @@ -15713,6 +18412,65 @@ op { } } } +op { + name: "Mean" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "Merge" input_arg { @@ -15894,6 +18652,65 @@ op { } } } +op { + name: "Min" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "Minimum" input_arg { @@ -16117,12 +18934,61 @@ op { is_stateful: true } op { - name: "MutableDenseHashTable" + name: "Multinomial" input_arg { - name: "empty_key" - type_attr: "key_dtype" + name: "logits" + type_attr: "T" } - output_arg { + input_arg { + name: "num_samples" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_INT64 + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + is_stateful: true +} +op { + name: "MutableDenseHashTable" + input_arg { + name: "empty_key" + type_attr: "key_dtype" + } + output_arg { name: "table_handle" type: DT_STRING is_ref: true @@ -18216,6 +21082,65 @@ op { } } } +op { + name: "Prod" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "PyFunc" input_arg { @@ -19494,250 +22419,1277 @@ op { } } attr { - name: "T2" - type: "type" - allowed_values { - list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 - } + name: "T2" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + attr { + name: "Toutput" + type: "type" + default_value { + type: DT_QINT32 + } + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + is_commutative: true +} +op { + name: "QuantizedRelu" + input_arg { + name: "features" + type_attr: "Tinput" + } + input_arg { + name: "min_features" + type: DT_FLOAT + } + input_arg { + name: "max_features" + type: DT_FLOAT + } + output_arg { + name: "activations" + type_attr: "out_type" + } + output_arg { + name: "min_activations" + type: DT_FLOAT + } + output_arg { + name: "max_activations" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_QUINT8 + } + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } +} +op { + name: "QuantizedRelu6" + input_arg { + name: "features" + type_attr: "Tinput" + } + input_arg { + name: "min_features" + type: DT_FLOAT + } + input_arg { + name: "max_features" + type: DT_FLOAT + } + output_arg { + name: "activations" + type_attr: "out_type" + } + output_arg { + name: "min_activations" + type: DT_FLOAT + } + output_arg { + name: "max_activations" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_QUINT8 + } + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } +} +op { + name: "QuantizedReluX" + input_arg { + name: "features" + type_attr: "Tinput" + } + input_arg { + name: "max_value" + type: DT_FLOAT + } + input_arg { + name: "min_features" + type: DT_FLOAT + } + input_arg { + name: "max_features" + type: DT_FLOAT + } + output_arg { + name: "activations" + type_attr: "out_type" + } + output_arg { + name: "min_activations" + type: DT_FLOAT + } + output_arg { + name: "max_activations" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_QUINT8 + } + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } +} +op { + name: "QuantizedReshape" + input_arg { + name: "tensor" + type_attr: "T" + } + input_arg { + name: "shape" + type_attr: "Tshape" + } + input_arg { + name: "input_min" + type: DT_FLOAT + } + input_arg { + name: "input_max" + type: DT_FLOAT + } + output_arg { + name: "output" + type_attr: "T" + } + output_arg { + name: "output_min" + type: DT_FLOAT + } + output_arg { + name: "output_max" + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + } + attr { + name: "Tshape" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "QuantizedResizeBilinear" + input_arg { + name: "images" + type_attr: "T" + } + input_arg { + name: "size" + type: DT_INT32 + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "resized_images" + type_attr: "T" + } + output_arg { + name: "out_min" + type: DT_FLOAT + } + output_arg { + name: "out_max" + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_QUINT8 + type: DT_QINT32 + type: DT_FLOAT + } + } + } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "QueueClose" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "cancel_pending_enqueues" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "QueueCloseV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "cancel_pending_enqueues" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} +op { + name: "QueueDequeue" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } +} +op { + name: "QueueDequeueMany" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "n" + type: DT_INT32 + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } +} +op { + name: "QueueDequeueManyV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + input_arg { + name: "n" + type: DT_INT32 + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "QueueDequeueUpTo" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "n" + type: DT_INT32 + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } +} +op { + name: "QueueDequeueUpToV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + input_arg { + name: "n" + type: DT_INT32 + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "QueueDequeueV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + output_arg { + name: "components" + type_list_attr: "component_types" + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "QueueEnqueue" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "components" + type_list_attr: "Tcomponents" + } + attr { + name: "Tcomponents" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } +} +op { + name: "QueueEnqueueMany" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "components" + type_list_attr: "Tcomponents" + } + attr { + name: "Tcomponents" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } +} +op { + name: "QueueEnqueueManyV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + input_arg { + name: "components" + type_list_attr: "Tcomponents" + } + attr { + name: "Tcomponents" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "QueueEnqueueV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + input_arg { + name: "components" + type_list_attr: "Tcomponents" + } + attr { + name: "Tcomponents" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "timeout_ms" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "QueueIsClosed" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "is_closed" + type: DT_BOOL + } +} +op { + name: "QueueIsClosedV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + output_arg { + name: "is_closed" + type: DT_BOOL + } + is_stateful: true +} +op { + name: "QueueSize" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "size" + type: DT_INT32 + } +} +op { + name: "QueueSizeV2" + input_arg { + name: "handle" + type: DT_RESOURCE + } + output_arg { + name: "size" + type: DT_INT32 + } + is_stateful: true +} +op { + name: "RFFT" + input_arg { + name: "input" + type: DT_FLOAT + } + input_arg { + name: "fft_length" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } +} +op { + name: "RFFT2D" + input_arg { + name: "input" + type: DT_FLOAT + } + input_arg { + name: "fft_length" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } +} +op { + name: "RFFT3D" + input_arg { + name: "input" + type: DT_FLOAT + } + input_arg { + name: "fft_length" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_COMPLEX64 + } +} +op { + name: "RGBToHSV" + input_arg { + name: "images" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "RandomCrop" + input_arg { + name: "image" + type_attr: "T" + } + input_arg { + name: "size" + type: DT_INT64 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + deprecation { + version: 8 + } + is_stateful: true +} +op { + name: "RandomGamma" + input_arg { + name: "shape" + type_attr: "S" + } + input_arg { + name: "alpha" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "S" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + is_stateful: true +} +op { + name: "RandomPoisson" + input_arg { + name: "shape" + type_attr: "S" + } + input_arg { + name: "rate" + type_attr: "dtype" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "S" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } + is_stateful: true +} +op { + name: "RandomPoissonV2" + input_arg { + name: "shape" + type_attr: "S" + } + input_arg { + name: "rate" + type_attr: "R" + } + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "S" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "R" + type: "type" + default_value { + type: DT_DOUBLE + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "dtype" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "RandomShuffle" + input_arg { + name: "value" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "T" + type: "type" + } + is_stateful: true +} +op { + name: "RandomShuffleQueue" + output_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + default_value { + list { + } + } + has_minimum: true + } + attr { + name: "capacity" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "min_after_dequeue" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "RandomShuffleQueueV2" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "component_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + default_value { + list { + } + } + has_minimum: true + } + attr { + name: "capacity" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "min_after_dequeue" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 } } attr { - name: "Toutput" - type: "type" + name: "container" + type: "string" default_value { - type: DT_QINT32 + s: "" } - allowed_values { - list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 - } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" } } - is_commutative: true + is_stateful: true } op { - name: "QuantizedRelu" - input_arg { - name: "features" - type_attr: "Tinput" - } - input_arg { - name: "min_features" - type: DT_FLOAT - } + name: "RandomStandardNormal" input_arg { - name: "max_features" - type: DT_FLOAT + name: "shape" + type_attr: "T" } output_arg { - name: "activations" - type_attr: "out_type" + name: "output" + type_attr: "dtype" } - output_arg { - name: "min_activations" - type: DT_FLOAT + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } } - output_arg { - name: "max_activations" - type: DT_FLOAT + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } } attr { - name: "Tinput" + name: "dtype" type: "type" allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE } } } attr { - name: "out_type" + name: "T" type: "type" - default_value { - type: DT_QUINT8 - } allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_INT32 + type: DT_INT64 } } } + is_stateful: true } op { - name: "QuantizedRelu6" - input_arg { - name: "features" - type_attr: "Tinput" - } - input_arg { - name: "min_features" - type: DT_FLOAT - } + name: "RandomUniform" input_arg { - name: "max_features" - type: DT_FLOAT + name: "shape" + type_attr: "T" } output_arg { - name: "activations" - type_attr: "out_type" + name: "output" + type_attr: "dtype" } - output_arg { - name: "min_activations" - type: DT_FLOAT + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } } - output_arg { - name: "max_activations" - type: DT_FLOAT + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } } attr { - name: "Tinput" + name: "dtype" type: "type" allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE } } } attr { - name: "out_type" + name: "T" type: "type" - default_value { - type: DT_QUINT8 - } allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_INT32 + type: DT_INT64 } } } + is_stateful: true } op { - name: "QuantizedReluX" - input_arg { - name: "features" - type_attr: "Tinput" - } + name: "RandomUniformInt" input_arg { - name: "max_value" - type: DT_FLOAT + name: "shape" + type_attr: "T" } input_arg { - name: "min_features" - type: DT_FLOAT + name: "minval" + type_attr: "Tout" } input_arg { - name: "max_features" - type: DT_FLOAT + name: "maxval" + type_attr: "Tout" } output_arg { - name: "activations" - type_attr: "out_type" + name: "output" + type_attr: "Tout" } - output_arg { - name: "min_activations" - type: DT_FLOAT + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } } - output_arg { - name: "max_activations" - type: DT_FLOAT + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } } attr { - name: "Tinput" + name: "Tout" type: "type" allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_INT32 + type: DT_INT64 } } } attr { - name: "out_type" + name: "T" type: "type" - default_value { - type: DT_QUINT8 - } allowed_values { list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 + type: DT_INT32 + type: DT_INT64 } } } + is_stateful: true } op { - name: "QuantizedReshape" - input_arg { - name: "tensor" - type_attr: "T" - } + name: "Range" input_arg { - name: "shape" - type_attr: "Tshape" + name: "start" + type_attr: "Tidx" } input_arg { - name: "input_min" - type: DT_FLOAT + name: "limit" + type_attr: "Tidx" } input_arg { - name: "input_max" - type: DT_FLOAT + name: "delta" + type_attr: "Tidx" } output_arg { name: "output" - type_attr: "T" - } - output_arg { - name: "output_min" - type: DT_FLOAT - } - output_arg { - name: "output_max" - type: DT_FLOAT - } - attr { - name: "T" - type: "type" + type_attr: "Tidx" } attr { - name: "Tshape" + name: "Tidx" type: "type" default_value { type: DT_INT32 } allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 type: DT_INT64 } @@ -19745,456 +23697,307 @@ op { } } op { - name: "QuantizedResizeBilinear" - input_arg { - name: "images" - type_attr: "T" - } + name: "RangeDataset" input_arg { - name: "size" - type: DT_INT32 + name: "start" + type: DT_INT64 } input_arg { - name: "min" - type: DT_FLOAT + name: "stop" + type: DT_INT64 } input_arg { - name: "max" - type: DT_FLOAT - } - output_arg { - name: "resized_images" - type_attr: "T" - } - output_arg { - name: "out_min" - type: DT_FLOAT + name: "step" + type: DT_INT64 } output_arg { - name: "out_max" - type: DT_FLOAT - } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_QUINT8 - type: DT_QINT32 - type: DT_FLOAT - } - } - } - attr { - name: "align_corners" - type: "bool" - default_value { - b: false - } - } -} -op { - name: "QueueClose" - input_arg { name: "handle" - type: DT_STRING - is_ref: true + type: DT_VARIANT } attr { - name: "cancel_pending_enqueues" - type: "bool" - default_value { - b: false - } - } -} -op { - name: "QueueCloseV2" - input_arg { - name: "handle" - type: DT_RESOURCE + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 } attr { - name: "cancel_pending_enqueues" - type: "bool" - default_value { - b: false - } + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 } is_stateful: true } op { - name: "QueueDequeue" + name: "Rank" input_arg { - name: "handle" - type: DT_STRING - is_ref: true + name: "input" + type_attr: "T" } output_arg { - name: "components" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "output" + type: DT_INT32 } attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "T" + type: "type" } } op { - name: "QueueDequeueMany" + name: "ReadFile" input_arg { - name: "handle" + name: "filename" type: DT_STRING - is_ref: true - } - input_arg { - name: "n" - type: DT_INT32 } output_arg { - name: "components" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "contents" + type: DT_STRING } } op { - name: "QueueDequeueManyV2" + name: "ReadVariableOp" input_arg { - name: "handle" + name: "resource" type: DT_RESOURCE } - input_arg { - name: "n" - type: DT_INT32 - } output_arg { - name: "components" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "value" + type_attr: "dtype" } attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "dtype" + type: "type" } is_stateful: true } op { - name: "QueueDequeueUpTo" + name: "ReaderNumRecordsProduced" input_arg { - name: "handle" + name: "reader_handle" type: DT_STRING is_ref: true } - input_arg { - name: "n" - type: DT_INT32 - } output_arg { - name: "components" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "records_produced" + type: DT_INT64 } } op { - name: "QueueDequeueUpToV2" + name: "ReaderNumRecordsProducedV2" input_arg { - name: "handle" + name: "reader_handle" type: DT_RESOURCE } - input_arg { - name: "n" - type: DT_INT32 - } output_arg { - name: "components" - type_list_attr: "component_types" - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + name: "records_produced" + type: DT_INT64 } is_stateful: true } op { - name: "QueueDequeueV2" + name: "ReaderNumWorkUnitsCompleted" input_arg { - name: "handle" - type: DT_RESOURCE + name: "reader_handle" + type: DT_STRING + is_ref: true } output_arg { - name: "components" - type_list_attr: "component_types" + name: "units_completed" + type: DT_INT64 } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 +} +op { + name: "ReaderNumWorkUnitsCompletedV2" + input_arg { + name: "reader_handle" + type: DT_RESOURCE } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + output_arg { + name: "units_completed" + type: DT_INT64 } is_stateful: true } op { - name: "QueueEnqueue" + name: "ReaderRead" input_arg { - name: "handle" + name: "reader_handle" type: DT_STRING is_ref: true } input_arg { - name: "components" - type_list_attr: "Tcomponents" - } - attr { - name: "Tcomponents" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "queue_handle" + type: DT_STRING + is_ref: true } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + output_arg { + name: "key" + type: DT_STRING + } + output_arg { + name: "value" + type: DT_STRING } } op { - name: "QueueEnqueueMany" + name: "ReaderReadUpTo" input_arg { - name: "handle" + name: "reader_handle" type: DT_STRING is_ref: true } input_arg { - name: "components" - type_list_attr: "Tcomponents" + name: "queue_handle" + type: DT_STRING + is_ref: true } - attr { - name: "Tcomponents" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "num_records" + type: DT_INT64 } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + output_arg { + name: "keys" + type: DT_STRING + } + output_arg { + name: "values" + type: DT_STRING } } op { - name: "QueueEnqueueManyV2" + name: "ReaderReadUpToV2" input_arg { - name: "handle" + name: "reader_handle" type: DT_RESOURCE } input_arg { - name: "components" - type_list_attr: "Tcomponents" + name: "queue_handle" + type: DT_RESOURCE } - attr { - name: "Tcomponents" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "num_records" + type: DT_INT64 } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + output_arg { + name: "keys" + type: DT_STRING + } + output_arg { + name: "values" + type: DT_STRING } is_stateful: true } op { - name: "QueueEnqueueV2" + name: "ReaderReadV2" input_arg { - name: "handle" + name: "reader_handle" type: DT_RESOURCE } input_arg { - name: "components" - type_list_attr: "Tcomponents" + name: "queue_handle" + type: DT_RESOURCE } - attr { - name: "Tcomponents" - type: "list(type)" - has_minimum: true - minimum: 1 + output_arg { + name: "key" + type: DT_STRING } - attr { - name: "timeout_ms" - type: "int" - default_value { - i: -1 - } + output_arg { + name: "value" + type: DT_STRING } is_stateful: true } op { - name: "QueueIsClosed" + name: "ReaderReset" input_arg { - name: "handle" + name: "reader_handle" type: DT_STRING is_ref: true } - output_arg { - name: "is_closed" - type: DT_BOOL - } } op { - name: "QueueIsClosedV2" + name: "ReaderResetV2" input_arg { - name: "handle" + name: "reader_handle" type: DT_RESOURCE } - output_arg { - name: "is_closed" - type: DT_BOOL - } is_stateful: true } op { - name: "QueueSize" + name: "ReaderRestoreState" input_arg { - name: "handle" + name: "reader_handle" type: DT_STRING is_ref: true } - output_arg { - name: "size" - type: DT_INT32 + input_arg { + name: "state" + type: DT_STRING } } op { - name: "QueueSizeV2" + name: "ReaderRestoreStateV2" input_arg { - name: "handle" + name: "reader_handle" type: DT_RESOURCE } - output_arg { - name: "size" - type: DT_INT32 + input_arg { + name: "state" + type: DT_STRING } is_stateful: true } op { - name: "RFFT" - input_arg { - name: "input" - type: DT_FLOAT - } + name: "ReaderSerializeState" input_arg { - name: "fft_length" - type: DT_INT32 + name: "reader_handle" + type: DT_STRING + is_ref: true } output_arg { - name: "output" - type: DT_COMPLEX64 + name: "state" + type: DT_STRING } } op { - name: "RFFT2D" - input_arg { - name: "input" - type: DT_FLOAT - } + name: "ReaderSerializeStateV2" input_arg { - name: "fft_length" - type: DT_INT32 + name: "reader_handle" + type: DT_RESOURCE } output_arg { - name: "output" - type: DT_COMPLEX64 + name: "state" + type: DT_STRING } + is_stateful: true } op { - name: "RFFT3D" + name: "Real" input_arg { name: "input" - type: DT_FLOAT - } - input_arg { - name: "fft_length" - type: DT_INT32 - } - output_arg { - name: "output" - type: DT_COMPLEX64 - } -} -op { - name: "RGBToHSV" - input_arg { - name: "images" type_attr: "T" } output_arg { name: "output" - type_attr: "T" + type_attr: "Tout" } attr { name: "T" type: "type" + default_value { + type: DT_COMPLEX64 + } + allowed_values { + list { + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + attr { + name: "Tout" + type: "type" default_value { type: DT_FLOAT } @@ -20207,17 +24010,17 @@ op { } } op { - name: "RandomCrop" + name: "RealDiv" input_arg { - name: "image" + name: "x" type_attr: "T" } input_arg { - name: "size" - type: DT_INT64 + name: "y" + type_attr: "T" } output_arg { - name: "output" + name: "z" type_attr: "T" } attr { @@ -20225,73 +24028,31 @@ op { type: "type" allowed_values { list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE type: DT_UINT8 type: DT_INT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT32 type: DT_INT64 - type: DT_FLOAT - type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } - } - deprecation { - version: 8 - } - is_stateful: true } op { - name: "RandomGamma" - input_arg { - name: "shape" - type_attr: "S" - } + name: "Reciprocal" input_arg { - name: "alpha" + name: "x" type_attr: "T" } output_arg { - name: "output" + name: "y" type_attr: "T" } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "S" - type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } - } attr { name: "T" type: "type" @@ -20300,832 +24061,892 @@ op { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - is_stateful: true } op { - name: "RandomPoisson" + name: "ReciprocalGrad" input_arg { - name: "shape" - type_attr: "S" + name: "x" + type_attr: "T" } input_arg { - name: "rate" - type_attr: "dtype" + name: "y" + type_attr: "T" } output_arg { - name: "output" - type_attr: "dtype" - } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "S" - type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } + name: "z" + type_attr: "T" } attr { - name: "dtype" + name: "T" type: "type" allowed_values { list { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - is_stateful: true } op { - name: "RandomPoissonV2" + name: "ReciprocalGrad" input_arg { - name: "shape" - type_attr: "S" + name: "y" + type_attr: "T" } input_arg { - name: "rate" - type_attr: "R" + name: "dy" + type_attr: "T" } output_arg { - name: "output" - type_attr: "dtype" - } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "S" - type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } - } - attr { - name: "R" - type: "type" - default_value { - type: DT_DOUBLE - } - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 - } - } + name: "z" + type_attr: "T" } attr { - name: "dtype" + name: "T" type: "type" - default_value { - type: DT_INT64 - } allowed_values { list { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } - is_stateful: true -} -op { - name: "RandomShuffle" - input_arg { - name: "value" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" - } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } - } - attr { - name: "T" - type: "type" - } - is_stateful: true } op { - name: "RandomShuffleQueue" + name: "RecordInput" output_arg { - name: "handle" + name: "records" type: DT_STRING - is_ref: true - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 } attr { - name: "shapes" - type: "list(shape)" - default_value { - list { - } - } - has_minimum: true + name: "file_pattern" + type: "string" } attr { - name: "capacity" + name: "file_random_seed" type: "int" default_value { - i: -1 + i: 301 } } attr { - name: "min_after_dequeue" - type: "int" + name: "file_shuffle_shift_ratio" + type: "float" default_value { - i: 0 + f: 0 } } attr { - name: "seed" + name: "file_buffer_size" type: "int" default_value { - i: 0 + i: 10000 } } attr { - name: "seed2" + name: "file_parallelism" type: "int" default_value { - i: 0 - } - } - attr { - name: "container" - type: "string" - default_value { - s: "" + i: 16 } } attr { - name: "shared_name" - type: "string" + name: "batch_size" + type: "int" default_value { - s: "" + i: 32 } } is_stateful: true } op { - name: "RandomShuffleQueueV2" - output_arg { - name: "handle" - type: DT_RESOURCE - } - attr { - name: "component_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "shapes" - type: "list(shape)" - default_value { - list { - } - } - has_minimum: true - } - attr { - name: "capacity" - type: "int" - default_value { - i: -1 - } - } - attr { - name: "min_after_dequeue" - type: "int" - default_value { - i: 0 - } + name: "ReduceJoin" + input_arg { + name: "inputs" + type: DT_STRING } - attr { - name: "seed" - type: "int" - default_value { - i: 0 - } + input_arg { + name: "reduction_indices" + type: DT_INT32 } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } + output_arg { + name: "output" + type: DT_STRING } attr { - name: "container" - type: "string" + name: "keep_dims" + type: "bool" default_value { - s: "" + b: false } } attr { - name: "shared_name" + name: "separator" type: "string" default_value { s: "" } } - is_stateful: true } op { - name: "RandomStandardNormal" + name: "RefEnter" input_arg { - name: "shape" + name: "data" type_attr: "T" + is_ref: true } output_arg { name: "output" - type_attr: "dtype" + type_attr: "T" + is_ref: true } attr { - name: "seed" - type: "int" + name: "T" + type: "type" + } + attr { + name: "frame_name" + type: "string" + } + attr { + name: "is_constant" + type: "bool" default_value { - i: 0 + b: false } } attr { - name: "seed2" + name: "parallel_iterations" type: "int" default_value { - i: 0 + i: 10 } } +} +op { + name: "RefExit" + input_arg { + name: "data" + type_attr: "T" + is_ref: true + } + output_arg { + name: "output" + type_attr: "T" + is_ref: true + } attr { - name: "dtype" + name: "T" type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - } - } + } +} +op { + name: "RefIdentity" + input_arg { + name: "input" + type_attr: "T" + is_ref: true + } + output_arg { + name: "output" + type_attr: "T" + is_ref: true } attr { name: "T" type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } } - is_stateful: true + allows_uninitialized_input: true } op { - name: "RandomUniform" + name: "RefMerge" input_arg { - name: "shape" + name: "inputs" type_attr: "T" + number_attr: "N" + is_ref: true } output_arg { name: "output" - type_attr: "dtype" + type_attr: "T" + is_ref: true + } + output_arg { + name: "value_index" + type: DT_INT32 } attr { - name: "seed" - type: "int" - default_value { - i: 0 - } + name: "T" + type: "type" } attr { - name: "seed2" + name: "N" type: "int" - default_value { - i: 0 - } + has_minimum: true + minimum: 1 + } +} +op { + name: "RefNextIteration" + input_arg { + name: "data" + type_attr: "T" + is_ref: true + } + output_arg { + name: "output" + type_attr: "T" + is_ref: true } attr { - name: "dtype" + name: "T" type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - } - } + } +} +op { + name: "RefSelect" + input_arg { + name: "index" + type: DT_INT32 + } + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + is_ref: true + } + output_arg { + name: "output" + type_attr: "T" + is_ref: true } attr { name: "T" type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } } - is_stateful: true + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } } op { - name: "RandomUniformInt" + name: "RefSwitch" input_arg { - name: "shape" + name: "data" type_attr: "T" + is_ref: true } input_arg { - name: "minval" - type_attr: "Tout" + name: "pred" + type: DT_BOOL } - input_arg { - name: "maxval" - type_attr: "Tout" + output_arg { + name: "output_false" + type_attr: "T" + is_ref: true } output_arg { - name: "output" - type_attr: "Tout" + name: "output_true" + type_attr: "T" + is_ref: true } attr { - name: "seed" - type: "int" - default_value { - i: 0 - } + name: "T" + type: "type" } - attr { - name: "seed2" - type: "int" - default_value { - i: 0 - } + allows_uninitialized_input: true +} +op { + name: "Relu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" } attr { - name: "Tout" + name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF } } } +} +op { + name: "Relu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } attr { name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } - is_stateful: true } op { - name: "Range" - input_arg { - name: "start" - type_attr: "Tidx" - } - input_arg { - name: "limit" - type_attr: "Tidx" - } + name: "Relu6" input_arg { - name: "delta" - type_attr: "Tidx" + name: "features" + type_attr: "T" } output_arg { - name: "output" - type_attr: "Tidx" + name: "activations" + type_attr: "T" } attr { - name: "Tidx" + name: "T" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { type: DT_FLOAT type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF } } } } op { - name: "RangeDataset" - input_arg { - name: "start" - type: DT_INT64 - } - input_arg { - name: "stop" - type: DT_INT64 - } + name: "Relu6" input_arg { - name: "step" - type: DT_INT64 + name: "features" + type_attr: "T" } output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "activations" + type_attr: "T" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } - is_stateful: true } op { - name: "Rank" + name: "Relu6Grad" input_arg { - name: "input" + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" type_attr: "T" } output_arg { - name: "output" - type: DT_INT32 + name: "backprops" + type_attr: "T" } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } } } op { - name: "ReadFile" + name: "Relu6Grad" input_arg { - name: "filename" - type: DT_STRING - } - output_arg { - name: "contents" - type: DT_STRING + name: "gradients" + type_attr: "T" } -} -op { - name: "ReadVariableOp" input_arg { - name: "resource" - type: DT_RESOURCE + name: "features" + type_attr: "T" } output_arg { - name: "value" - type_attr: "dtype" + name: "backprops" + type_attr: "T" } attr { - name: "dtype" + name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } - is_stateful: true } op { - name: "ReaderNumRecordsProduced" + name: "ReluGrad" input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true - } - output_arg { - name: "records_produced" - type: DT_INT64 + name: "gradients" + type_attr: "T" } -} -op { - name: "ReaderNumRecordsProducedV2" input_arg { - name: "reader_handle" - type: DT_RESOURCE + name: "features" + type_attr: "T" } output_arg { - name: "records_produced" - type: DT_INT64 + name: "backprops" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } } - is_stateful: true } op { - name: "ReaderNumWorkUnitsCompleted" + name: "ReluGrad" input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true - } - output_arg { - name: "units_completed" - type: DT_INT64 + name: "gradients" + type_attr: "T" } -} -op { - name: "ReaderNumWorkUnitsCompletedV2" input_arg { - name: "reader_handle" - type: DT_RESOURCE + name: "features" + type_attr: "T" } output_arg { - name: "units_completed" - type: DT_INT64 + name: "backprops" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } - is_stateful: true } op { - name: "ReaderRead" + name: "RemoteCall" input_arg { - name: "reader_handle" + name: "target" type: DT_STRING - is_ref: true } input_arg { - name: "queue_handle" - type: DT_STRING - is_ref: true + name: "args" + type_list_attr: "Tin" } output_arg { - name: "key" - type: DT_STRING + name: "output" + type_list_attr: "Tout" } - output_arg { - name: "value" - type: DT_STRING + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + minimum: 1 } -} -op { - name: "ReaderReadUpTo" - input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + minimum: 1 } - input_arg { - name: "queue_handle" - type: DT_STRING - is_ref: true + attr { + name: "f" + type: "func" } +} +op { + name: "RemoteFusedGraphExecute" input_arg { - name: "num_records" - type: DT_INT64 + name: "inputs" + type_list_attr: "Tinputs" } output_arg { - name: "keys" - type: DT_STRING + name: "outputs" + type_list_attr: "Toutputs" } - output_arg { - name: "values" - type: DT_STRING + attr { + name: "Tinputs" + type: "list(type)" + has_minimum: true } -} -op { - name: "ReaderReadUpToV2" - input_arg { - name: "reader_handle" - type: DT_RESOURCE + attr { + name: "Toutputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "serialized_remote_fused_graph_execute_info" + type: "string" } +} +op { + name: "RepeatDataset" input_arg { - name: "queue_handle" - type: DT_RESOURCE + name: "input_dataset" + type: DT_VARIANT } input_arg { - name: "num_records" + name: "count" type: DT_INT64 } output_arg { - name: "keys" - type: DT_STRING + name: "handle" + type: DT_VARIANT } - output_arg { - name: "values" - type: DT_STRING + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 } is_stateful: true } op { - name: "ReaderReadV2" + name: "RepeatDataset" input_arg { - name: "reader_handle" - type: DT_RESOURCE + name: "input_dataset" + type: DT_VARIANT } input_arg { - name: "queue_handle" - type: DT_RESOURCE + name: "count" + type: DT_INT64 } output_arg { - name: "key" - type: DT_STRING + name: "handle" + type: DT_VARIANT } - output_arg { - name: "value" - type: DT_STRING + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 } - is_stateful: true } op { - name: "ReaderReset" + name: "RequantizationRange" input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true + name: "input" + type_attr: "Tinput" } -} -op { - name: "ReaderResetV2" input_arg { - name: "reader_handle" - type: DT_RESOURCE + name: "input_min" + type: DT_FLOAT + } + input_arg { + name: "input_max" + type: DT_FLOAT + } + output_arg { + name: "output_min" + type: DT_FLOAT + } + output_arg { + name: "output_max" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } } - is_stateful: true } op { - name: "ReaderRestoreState" + name: "Requantize" input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true + name: "input" + type_attr: "Tinput" } input_arg { - name: "state" - type: DT_STRING + name: "input_min" + type: DT_FLOAT } -} -op { - name: "ReaderRestoreStateV2" input_arg { - name: "reader_handle" - type: DT_RESOURCE + name: "input_max" + type: DT_FLOAT } input_arg { - name: "state" - type: DT_STRING + name: "requested_output_min" + type: DT_FLOAT } - is_stateful: true -} -op { - name: "ReaderSerializeState" input_arg { - name: "reader_handle" - type: DT_STRING - is_ref: true + name: "requested_output_max" + type: DT_FLOAT } output_arg { - name: "state" - type: DT_STRING + name: "output" + type_attr: "out_type" } -} -op { - name: "ReaderSerializeStateV2" - input_arg { - name: "reader_handle" - type: DT_RESOURCE + output_arg { + name: "output_min" + type: DT_FLOAT } output_arg { - name: "state" - type: DT_STRING + name: "output_max" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + attr { + name: "out_type" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } } - is_stateful: true } op { - name: "Real" + name: "Reshape" input_arg { - name: "input" + name: "tensor" type_attr: "T" } + input_arg { + name: "shape" + type_attr: "Tshape" + } output_arg { name: "output" - type_attr: "Tout" + type_attr: "T" } attr { name: "T" type: "type" + } + attr { + name: "Tshape" + type: "type" default_value { - type: DT_COMPLEX64 + type: DT_INT32 } allowed_values { list { - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 } } } +} +op { + name: "ResizeArea" + input_arg { + name: "images" + type_attr: "T" + } + input_arg { + name: "size" + type: DT_INT32 + } + output_arg { + name: "resized_images" + type: DT_FLOAT + } attr { - name: "Tout" + name: "T" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE } } } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } } op { - name: "RealDiv" + name: "ResizeBicubic" input_arg { - name: "x" + name: "images" type_attr: "T" } input_arg { - name: "y" - type_attr: "T" + name: "size" + type: DT_INT32 } output_arg { - name: "z" - type_attr: "T" + name: "resized_images" + type: DT_FLOAT } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE type: DT_UINT8 type: DT_INT8 - type: DT_UINT16 type: DT_INT16 type: DT_INT32 type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE } } } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } } op { - name: "Reciprocal" + name: "ResizeBicubicGrad" input_arg { - name: "x" + name: "grads" + type: DT_FLOAT + } + input_arg { + name: "original_image" type_attr: "T" } output_arg { - name: "y" + name: "output" type_attr: "T" } attr { @@ -21133,57 +24954,69 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } } op { - name: "ReciprocalGrad" + name: "ResizeBilinear" input_arg { - name: "x" + name: "images" type_attr: "T" } input_arg { - name: "y" - type_attr: "T" + name: "size" + type: DT_INT32 } output_arg { - name: "z" - type_attr: "T" + name: "resized_images" + type: DT_FLOAT } attr { name: "T" type: "type" allowed_values { list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } } op { - name: "ReciprocalGrad" + name: "ResizeBilinearGrad" input_arg { - name: "y" - type_attr: "T" + name: "grads" + type: DT_FLOAT } input_arg { - name: "dy" + name: "original_image" type_attr: "T" } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { @@ -21191,271 +25024,417 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT + type: DT_HALF type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } + attr { + name: "align_corners" + type: "bool" + default_value { + b: false + } + } } op { - name: "RecordInput" + name: "ResizeNearestNeighbor" + input_arg { + name: "images" + type_attr: "T" + } + input_arg { + name: "size" + type: DT_INT32 + } output_arg { - name: "records" - type: DT_STRING + name: "resized_images" + type_attr: "T" } attr { - name: "file_pattern" - type: "string" + name: "T" + type: "type" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } } attr { - name: "file_random_seed" - type: "int" + name: "align_corners" + type: "bool" default_value { - i: 301 + b: false } } +} +op { + name: "ResizeNearestNeighborGrad" + input_arg { + name: "grads" + type_attr: "T" + } + input_arg { + name: "size" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } attr { - name: "file_shuffle_shift_ratio" - type: "float" - default_value { - f: 0 + name: "T" + type: "type" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT32 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } } } attr { - name: "file_buffer_size" - type: "int" + name: "align_corners" + type: "bool" default_value { - i: 10000 + b: false } } +} +op { + name: "ResourceApplyAdadelta" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "accum_update" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } attr { - name: "file_parallelism" - type: "int" - default_value { - i: 16 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } } } attr { - name: "batch_size" - type: "int" + name: "use_locking" + type: "bool" default_value { - i: 32 + b: false } } is_stateful: true } op { - name: "ReduceJoin" + name: "ResourceApplyAdadelta" input_arg { - name: "inputs" - type: DT_STRING + name: "var" + type: DT_RESOURCE } input_arg { - name: "reduction_indices" - type: DT_INT32 + name: "accum" + type: DT_RESOURCE } - output_arg { - name: "output" - type: DT_STRING + input_arg { + name: "accum_update" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" } attr { - name: "keep_dims" - type: "bool" - default_value { - b: false + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } } } attr { - name: "separator" - type: "string" + name: "use_locking" + type: "bool" default_value { - s: "" + b: false } } + is_stateful: true } op { - name: "RefEnter" + name: "ResourceApplyAdagrad" input_arg { - name: "data" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" - is_ref: true } - output_arg { - name: "output" + input_arg { + name: "grad" type_attr: "T" - is_ref: true } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "frame_name" - type: "string" - } - attr { - name: "is_constant" + name: "use_locking" type: "bool" default_value { b: false } } - attr { - name: "parallel_iterations" - type: "int" - default_value { - i: 10 - } - } + is_stateful: true } op { - name: "RefExit" + name: "ResourceApplyAdagrad" input_arg { - name: "data" - type_attr: "T" - is_ref: true - } - output_arg { - name: "output" - type_attr: "T" - is_ref: true + name: "var" + type: DT_RESOURCE } - attr { - name: "T" - type: "type" + input_arg { + name: "accum" + type: DT_RESOURCE } -} -op { - name: "RefIdentity" input_arg { - name: "input" + name: "lr" type_attr: "T" - is_ref: true } - output_arg { - name: "output" + input_arg { + name: "grad" type_attr: "T" - is_ref: true } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } - allows_uninitialized_input: true + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "RefMerge" + name: "ResourceApplyAdagradDA" input_arg { - name: "inputs" - type_attr: "T" - number_attr: "N" - is_ref: true - } - output_arg { - name: "output" - type_attr: "T" - is_ref: true - } - output_arg { - name: "value_index" - type: DT_INT32 + name: "var" + type: DT_RESOURCE } - attr { - name: "T" - type: "type" + input_arg { + name: "gradient_accumulator" + type: DT_RESOURCE } - attr { - name: "N" - type: "int" - has_minimum: true - minimum: 1 + input_arg { + name: "gradient_squared_accumulator" + type: DT_RESOURCE } -} -op { - name: "RefNextIteration" input_arg { - name: "data" + name: "grad" type_attr: "T" - is_ref: true } - output_arg { - name: "output" + input_arg { + name: "lr" type_attr: "T" - is_ref: true - } - attr { - name: "T" - type: "type" } -} -op { - name: "RefSelect" input_arg { - name: "index" - type: DT_INT32 + name: "l1" + type_attr: "T" } input_arg { - name: "inputs" + name: "l2" type_attr: "T" - number_attr: "N" - is_ref: true } - output_arg { - name: "output" - type_attr: "T" - is_ref: true + input_arg { + name: "global_step" + type: DT_INT64 } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "N" - type: "int" - has_minimum: true - minimum: 1 + name: "use_locking" + type: "bool" + default_value { + b: false + } } + is_stateful: true } op { - name: "RefSwitch" + name: "ResourceApplyAdagradDA" input_arg { - name: "data" - type_attr: "T" - is_ref: true + name: "var" + type: DT_RESOURCE } input_arg { - name: "pred" - type: DT_BOOL + name: "gradient_accumulator" + type: DT_RESOURCE } - output_arg { - name: "output_false" - type_attr: "T" - is_ref: true + input_arg { + name: "gradient_squared_accumulator" + type: DT_RESOURCE } - output_arg { - name: "output_true" + input_arg { + name: "grad" type_attr: "T" - is_ref: true } - attr { - name: "T" - type: "type" + input_arg { + name: "lr" + type_attr: "T" } - allows_uninitialized_input: true -} -op { - name: "Relu" input_arg { - name: "features" + name: "l1" type_attr: "T" } - output_arg { - name: "activations" + input_arg { + name: "l2" type_attr: "T" } + input_arg { + name: "global_step" + type: DT_INT64 + } attr { name: "T" type: "type" @@ -21463,25 +25442,72 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "Relu6" + name: "ResourceApplyAdam" input_arg { - name: "features" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "v" + type: DT_RESOURCE + } + input_arg { + name: "beta1_power" type_attr: "T" } - output_arg { - name: "activations" + input_arg { + name: "beta2_power" + type_attr: "T" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } attr { @@ -21491,29 +25517,70 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "Relu6Grad" + name: "ResourceApplyAdam" input_arg { - name: "gradients" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "v" + type: DT_RESOURCE + } + input_arg { + name: "beta1_power" type_attr: "T" } input_arg { - name: "features" + name: "beta2_power" type_attr: "T" } - output_arg { - name: "backprops" + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } attr { @@ -21523,29 +25590,77 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "ReluGrad" + name: "ResourceApplyAdam" input_arg { - name: "gradients" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "v" + type: DT_RESOURCE + } + input_arg { + name: "beta1_power" type_attr: "T" } input_arg { - name: "features" + name: "beta2_power" type_attr: "T" } - output_arg { - name: "backprops" + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "beta1" + type_attr: "T" + } + input_arg { + name: "beta2" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } attr { @@ -21555,345 +25670,463 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } -} -op { - name: "RemoteCall" - input_arg { - name: "target" - type: DT_STRING - } - input_arg { - name: "args" - type_list_attr: "Tin" - } - output_arg { - name: "output" - type_list_attr: "Tout" - } - attr { - name: "Tin" - type: "list(type)" - has_minimum: true - minimum: 1 - } attr { - name: "Tout" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "use_locking" + type: "bool" + default_value { + b: false + } } attr { - name: "f" - type: "func" + name: "use_nesterov" + type: "bool" + default_value { + b: false + } } + is_stateful: true } op { - name: "RemoteFusedGraphExecute" + name: "ResourceApplyCenteredRMSProp" input_arg { - name: "inputs" - type_list_attr: "Tinputs" + name: "var" + type: DT_RESOURCE } - output_arg { - name: "outputs" - type_list_attr: "Toutputs" + input_arg { + name: "mg" + type: DT_RESOURCE } - attr { - name: "Tinputs" - type: "list(type)" - has_minimum: true + input_arg { + name: "ms" + type: DT_RESOURCE } - attr { - name: "Toutputs" - type: "list(type)" - has_minimum: true + input_arg { + name: "mom" + type: DT_RESOURCE } - attr { - name: "serialized_remote_fused_graph_execute_info" - type: "string" + input_arg { + name: "lr" + type_attr: "T" } -} -op { - name: "RepeatDataset" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "rho" + type_attr: "T" + } + input_arg { + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" } input_arg { - name: "count" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT + name: "grad" + type_attr: "T" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "use_locking" + type: "bool" + default_value { + b: false + } } is_stateful: true } op { - name: "RepeatDataset" + name: "ResourceApplyCenteredRMSProp" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "var" + type: DT_RESOURCE } input_arg { - name: "count" - type: DT_INT64 - } - output_arg { - name: "handle" - type: DT_VARIANT + name: "mg" + type: DT_RESOURCE } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "ms" + type: DT_RESOURCE } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + input_arg { + name: "mom" + type: DT_RESOURCE } -} -op { - name: "RequantizationRange" input_arg { - name: "input" - type_attr: "Tinput" + name: "lr" + type_attr: "T" } input_arg { - name: "input_min" - type: DT_FLOAT + name: "rho" + type_attr: "T" } input_arg { - name: "input_max" - type: DT_FLOAT + name: "momentum" + type_attr: "T" } - output_arg { - name: "output_min" - type: DT_FLOAT + input_arg { + name: "epsilon" + type_attr: "T" } - output_arg { - name: "output_max" - type: DT_FLOAT + input_arg { + name: "grad" + type_attr: "T" } attr { - name: "Tinput" + name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 type: DT_QINT8 type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "Requantize" + name: "ResourceApplyFtrl" input_arg { - name: "input" - type_attr: "Tinput" + name: "var" + type: DT_RESOURCE } input_arg { - name: "input_min" - type: DT_FLOAT + name: "accum" + type: DT_RESOURCE } input_arg { - name: "input_max" - type: DT_FLOAT + name: "linear" + type: DT_RESOURCE } input_arg { - name: "requested_output_min" - type: DT_FLOAT + name: "grad" + type_attr: "T" } input_arg { - name: "requested_output_max" - type: DT_FLOAT + name: "lr" + type_attr: "T" } - output_arg { - name: "output" - type_attr: "out_type" + input_arg { + name: "l1" + type_attr: "T" } - output_arg { - name: "output_min" - type: DT_FLOAT + input_arg { + name: "l2" + type_attr: "T" } - output_arg { - name: "output_max" - type: DT_FLOAT + input_arg { + name: "lr_power" + type_attr: "T" } attr { - name: "Tinput" + name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 type: DT_QINT8 type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 type: DT_QINT32 + type: DT_HALF } } } attr { - name: "out_type" - type: "type" - allowed_values { - list { - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT16 - type: DT_QUINT16 - type: DT_QINT32 - } + name: "use_locking" + type: "bool" + default_value { + b: false } } + is_stateful: true } op { - name: "Reshape" + name: "ResourceApplyFtrl" input_arg { - name: "tensor" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "linear" + type: DT_RESOURCE + } + input_arg { + name: "grad" type_attr: "T" } input_arg { - name: "shape" - type_attr: "Tshape" + name: "lr" + type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "l1" type_attr: "T" } - attr { - name: "T" - type: "type" + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } attr { - name: "Tshape" + name: "T" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true } op { - name: "ResizeArea" + name: "ResourceApplyFtrlV2" input_arg { - name: "images" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "linear" + type: DT_RESOURCE + } + input_arg { + name: "grad" type_attr: "T" } input_arg { - name: "size" - type: DT_INT32 + name: "lr" + type_attr: "T" } - output_arg { - name: "resized_images" - type: DT_FLOAT + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 type: DT_UINT8 - type: DT_INT8 + type: DT_UINT16 type: DT_INT16 - type: DT_INT32 - type: DT_INT64 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE } } } attr { - name: "align_corners" + name: "use_locking" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeBicubic" + name: "ResourceApplyFtrlV2" input_arg { - name: "images" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "linear" + type: DT_RESOURCE + } + input_arg { + name: "grad" type_attr: "T" } input_arg { - name: "size" - type: DT_INT32 + name: "lr" + type_attr: "T" } - output_arg { - name: "resized_images" - type: DT_FLOAT + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 type: DT_UINT8 - type: DT_INT8 + type: DT_UINT16 type: DT_INT16 - type: DT_INT32 - type: DT_INT64 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "align_corners" + name: "use_locking" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeBicubicGrad" + name: "ResourceApplyGradientDescent" input_arg { - name: "grads" - type: DT_FLOAT + name: "var" + type: DT_RESOURCE } input_arg { - name: "original_image" + name: "alpha" type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "delta" type_attr: "T" } attr { @@ -21903,67 +26136,97 @@ op { list { type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "align_corners" + name: "use_locking" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeBilinear" + name: "ResourceApplyGradientDescent" input_arg { - name: "images" - type_attr: "T" + name: "var" + type: DT_RESOURCE } input_arg { - name: "size" - type: DT_INT32 + name: "alpha" + type_attr: "T" } - output_arg { - name: "resized_images" - type: DT_FLOAT + input_arg { + name: "delta" + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 type: DT_UINT8 - type: DT_INT8 + type: DT_UINT16 type: DT_INT16 - type: DT_INT32 - type: DT_INT64 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "align_corners" + name: "use_locking" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeBilinearGrad" + name: "ResourceApplyMomentum" input_arg { - name: "grads" - type: DT_FLOAT + name: "var" + type: DT_RESOURCE } input_arg { - name: "original_image" + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "momentum" type_attr: "T" } attr { @@ -21972,31 +26235,58 @@ op { allowed_values { list { type: DT_FLOAT - type: DT_HALF type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "align_corners" + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "use_nesterov" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeNearestNeighbor" + name: "ResourceApplyMomentum" input_arg { - name: "images" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "size" - type: DT_INT32 + name: "grad" + type_attr: "T" } - output_arg { - name: "resized_images" + input_arg { + name: "momentum" type_attr: "T" } attr { @@ -22004,37 +26294,65 @@ op { type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 type: DT_UINT8 - type: DT_INT8 + type: DT_UINT16 type: DT_INT16 - type: DT_INT32 - type: DT_INT64 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "align_corners" + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "use_nesterov" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResizeNearestNeighborGrad" + name: "ResourceApplyProximalAdagrad" input_arg { - name: "grads" + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "size" - type: DT_INT32 + name: "l1" + type_attr: "T" } - output_arg { - name: "output" + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } attr { @@ -22042,25 +26360,34 @@ op { type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 type: DT_INT8 - type: DT_INT32 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE } } } attr { - name: "align_corners" + name: "use_locking" type: "bool" default_value { b: false } } + is_stateful: true } op { - name: "ResourceApplyAdadelta" + name: "ResourceApplyProximalAdagrad" input_arg { name: "var" type: DT_RESOURCE @@ -22069,20 +26396,16 @@ op { name: "accum" type: DT_RESOURCE } - input_arg { - name: "accum_update" - type: DT_RESOURCE - } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "rho" + name: "l1" type_attr: "T" } input_arg { - name: "epsilon" + name: "l2" type_attr: "T" } input_arg { @@ -22108,6 +26431,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22121,21 +26446,25 @@ op { is_stateful: true } op { - name: "ResourceApplyAdagrad" + name: "ResourceApplyProximalGradientDescent" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "accum" - type: DT_RESOURCE + name: "alpha" + type_attr: "T" } input_arg { - name: "lr" + name: "l1" type_attr: "T" } input_arg { - name: "grad" + name: "l2" + type_attr: "T" + } + input_arg { + name: "delta" type_attr: "T" } attr { @@ -22170,25 +26499,13 @@ op { is_stateful: true } op { - name: "ResourceApplyAdagradDA" + name: "ResourceApplyProximalGradientDescent" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "gradient_accumulator" - type: DT_RESOURCE - } - input_arg { - name: "gradient_squared_accumulator" - type: DT_RESOURCE - } - input_arg { - name: "grad" - type_attr: "T" - } - input_arg { - name: "lr" + name: "alpha" type_attr: "T" } input_arg { @@ -22200,8 +26517,8 @@ op { type_attr: "T" } input_arg { - name: "global_step" - type: DT_INT64 + name: "delta" + type_attr: "T" } attr { name: "T" @@ -22222,6 +26539,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22235,37 +26554,29 @@ op { is_stateful: true } op { - name: "ResourceApplyAdam" + name: "ResourceApplyRMSProp" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "m" + name: "ms" type: DT_RESOURCE } input_arg { - name: "v" + name: "mom" type: DT_RESOURCE } - input_arg { - name: "beta1_power" - type_attr: "T" - } - input_arg { - name: "beta2_power" - type_attr: "T" - } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "beta1" + name: "rho" type_attr: "T" } input_arg { - name: "beta2" + name: "momentum" type_attr: "T" } input_arg { @@ -22308,37 +26619,29 @@ op { is_stateful: true } op { - name: "ResourceApplyAdam" + name: "ResourceApplyRMSProp" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "m" + name: "ms" type: DT_RESOURCE } input_arg { - name: "v" + name: "mom" type: DT_RESOURCE } - input_arg { - name: "beta1_power" - type_attr: "T" - } - input_arg { - name: "beta2_power" - type_attr: "T" - } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "beta1" + name: "rho" type_attr: "T" } input_arg { - name: "beta2" + name: "momentum" type_attr: "T" } input_arg { @@ -22368,6 +26671,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22378,55 +26683,109 @@ op { b: false } } + is_stateful: true +} +op { + name: "ResourceGather" + input_arg { + name: "resource" + type: DT_RESOURCE + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "dtype" + } attr { - name: "use_nesterov" + name: "validate_indices" type: "bool" default_value { - b: false + b: true + } + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } } } is_stateful: true } op { - name: "ResourceApplyCenteredRMSProp" - input_arg { - name: "var" - type: DT_RESOURCE - } + name: "ResourceScatterAdd" input_arg { - name: "mg" + name: "resource" type: DT_RESOURCE } input_arg { - name: "ms" - type: DT_RESOURCE + name: "indices" + type_attr: "Tindices" } input_arg { - name: "mom" - type: DT_RESOURCE + name: "updates" + type_attr: "dtype" } - input_arg { - name: "lr" - type_attr: "T" + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } - input_arg { - name: "rho" - type_attr: "T" + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } + is_stateful: true +} +op { + name: "ResourceScatterAdd" input_arg { - name: "momentum" - type_attr: "T" + name: "resource" + type: DT_RESOURCE } input_arg { - name: "epsilon" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } input_arg { - name: "grad" - type_attr: "T" + name: "updates" + type_attr: "dtype" } attr { - name: "T" + name: "dtype" type: "type" allowed_values { list { @@ -22444,20 +26803,25 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "use_locking" - type: "bool" - default_value { - b: false + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } } } is_stateful: true } op { - name: "ResourceApplyFtrl" + name: "ResourceSparseApplyAdadelta" input_arg { name: "var" type: DT_RESOURCE @@ -22467,28 +26831,28 @@ op { type: DT_RESOURCE } input_arg { - name: "linear" + name: "accum_update" type: DT_RESOURCE } input_arg { - name: "grad" + name: "lr" type_attr: "T" } input_arg { - name: "lr" + name: "rho" type_attr: "T" } input_arg { - name: "l1" + name: "epsilon" type_attr: "T" } input_arg { - name: "l2" + name: "grad" type_attr: "T" } input_arg { - name: "lr_power" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } attr { name: "T" @@ -22512,6 +26876,16 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } attr { name: "use_locking" type: "bool" @@ -22522,7 +26896,7 @@ op { is_stateful: true } op { - name: "ResourceApplyFtrlV2" + name: "ResourceSparseApplyAdadelta" input_arg { name: "var" type: DT_RESOURCE @@ -22532,32 +26906,28 @@ op { type: DT_RESOURCE } input_arg { - name: "linear" + name: "accum_update" type: DT_RESOURCE } - input_arg { - name: "grad" - type_attr: "T" - } input_arg { name: "lr" type_attr: "T" } input_arg { - name: "l1" + name: "rho" type_attr: "T" } input_arg { - name: "l2" + name: "epsilon" type_attr: "T" } input_arg { - name: "l2_shrinkage" + name: "grad" type_attr: "T" } input_arg { - name: "lr_power" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } attr { name: "T" @@ -22578,6 +26948,18 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 } } } @@ -22591,19 +26973,27 @@ op { is_stateful: true } op { - name: "ResourceApplyGradientDescent" + name: "ResourceSparseApplyAdagrad" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "alpha" + name: "accum" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "delta" + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } attr { name: "T" type: "type" @@ -22626,6 +27016,16 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } attr { name: "use_locking" type: "bool" @@ -22636,7 +27036,7 @@ op { is_stateful: true } op { - name: "ResourceApplyMomentum" + name: "ResourceSparseApplyAdagrad" input_arg { name: "var" type: DT_RESOURCE @@ -22654,8 +27054,8 @@ op { type_attr: "T" } input_arg { - name: "momentum" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } attr { name: "T" @@ -22676,18 +27076,23 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "use_locking" - type: "bool" - default_value { - b: false + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } } } attr { - name: "use_nesterov" + name: "use_locking" type: "bool" default_value { b: false @@ -22696,15 +27101,27 @@ op { is_stateful: true } op { - name: "ResourceApplyProximalAdagrad" + name: "ResourceSparseApplyAdagradDA" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "accum" + name: "gradient_accumulator" + type: DT_RESOURCE + } + input_arg { + name: "gradient_squared_accumulator" type: DT_RESOURCE } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } input_arg { name: "lr" type_attr: "T" @@ -22718,8 +27135,8 @@ op { type_attr: "T" } input_arg { - name: "grad" - type_attr: "T" + name: "global_step" + type: DT_INT64 } attr { name: "T" @@ -22743,6 +27160,16 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } attr { name: "use_locking" type: "bool" @@ -22753,13 +27180,29 @@ op { is_stateful: true } op { - name: "ResourceApplyProximalGradientDescent" + name: "ResourceSparseApplyAdagradDA" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "alpha" + name: "gradient_accumulator" + type: DT_RESOURCE + } + input_arg { + name: "gradient_squared_accumulator" + type: DT_RESOURCE + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "lr" type_attr: "T" } input_arg { @@ -22771,8 +27214,8 @@ op { type_attr: "T" } input_arg { - name: "delta" - type_attr: "T" + name: "global_step" + type: DT_INT64 } attr { name: "T" @@ -22793,6 +27236,18 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 } } } @@ -22806,11 +27261,15 @@ op { is_stateful: true } op { - name: "ResourceApplyRMSProp" + name: "ResourceSparseApplyCenteredRMSProp" input_arg { name: "var" type: DT_RESOURCE } + input_arg { + name: "mg" + type: DT_RESOURCE + } input_arg { name: "ms" type: DT_RESOURCE @@ -22839,6 +27298,10 @@ op { name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } attr { name: "T" type: "type" @@ -22861,6 +27324,16 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } attr { name: "use_locking" type: "bool" @@ -22871,58 +27344,49 @@ op { is_stateful: true } op { - name: "ResourceGather" + name: "ResourceSparseApplyCenteredRMSProp" input_arg { - name: "resource" + name: "var" type: DT_RESOURCE } input_arg { - name: "indices" - type_attr: "Tindices" + name: "mg" + type: DT_RESOURCE } - output_arg { - name: "output" - type_attr: "dtype" + input_arg { + name: "ms" + type: DT_RESOURCE } - attr { - name: "validate_indices" - type: "bool" - default_value { - b: true - } + input_arg { + name: "mom" + type: DT_RESOURCE } - attr { - name: "dtype" - type: "type" + input_arg { + name: "lr" + type_attr: "T" } - attr { - name: "Tindices" - type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } + input_arg { + name: "rho" + type_attr: "T" } - is_stateful: true -} -op { - name: "ResourceScatterAdd" input_arg { - name: "resource" - type: DT_RESOURCE + name: "momentum" + type_attr: "T" } input_arg { - name: "indices" - type_attr: "Tindices" + name: "epsilon" + type_attr: "T" } input_arg { - name: "updates" - type_attr: "dtype" + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" } attr { - name: "dtype" + name: "T" type: "type" allowed_values { list { @@ -22940,6 +27404,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22953,10 +27419,17 @@ op { } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } is_stateful: true } op { - name: "ResourceSparseApplyAdadelta" + name: "ResourceSparseApplyFtrl" input_arg { name: "var" type: DT_RESOURCE @@ -22966,28 +27439,32 @@ op { type: DT_RESOURCE } input_arg { - name: "accum_update" + name: "linear" type: DT_RESOURCE } input_arg { - name: "lr" + name: "grad" type_attr: "T" } input_arg { - name: "rho" + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "epsilon" + name: "l1" type_attr: "T" } input_arg { - name: "grad" + name: "l2" type_attr: "T" } input_arg { - name: "indices" - type_attr: "Tindices" + name: "lr_power" + type_attr: "T" } attr { name: "T" @@ -23031,7 +27508,7 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyAdagrad" + name: "ResourceSparseApplyFtrl" input_arg { name: "var" type: DT_RESOURCE @@ -23041,8 +27518,8 @@ op { type: DT_RESOURCE } input_arg { - name: "lr" - type_attr: "T" + name: "linear" + type: DT_RESOURCE } input_arg { name: "grad" @@ -23052,6 +27529,22 @@ op { name: "indices" type_attr: "Tindices" } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" + } attr { name: "T" type: "type" @@ -23071,6 +27564,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23094,17 +27589,17 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyAdagradDA" + name: "ResourceSparseApplyFtrlV2" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "gradient_accumulator" + name: "accum" type: DT_RESOURCE } input_arg { - name: "gradient_squared_accumulator" + name: "linear" type: DT_RESOURCE } input_arg { @@ -23128,8 +27623,12 @@ op { type_attr: "T" } input_arg { - name: "global_step" - type: DT_INT64 + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } attr { name: "T" @@ -23173,46 +27672,46 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyCenteredRMSProp" + name: "ResourceSparseApplyFtrlV2" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "mg" + name: "accum" type: DT_RESOURCE } input_arg { - name: "ms" + name: "linear" type: DT_RESOURCE } input_arg { - name: "mom" - type: DT_RESOURCE + name: "grad" + type_attr: "T" } input_arg { - name: "lr" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } input_arg { - name: "rho" + name: "lr" type_attr: "T" } input_arg { - name: "momentum" + name: "l1" type_attr: "T" } input_arg { - name: "epsilon" + name: "l2" type_attr: "T" } input_arg { - name: "grad" + name: "l2_shrinkage" type_attr: "T" } input_arg { - name: "indices" - type_attr: "Tindices" + name: "lr_power" + type_attr: "T" } attr { name: "T" @@ -23233,6 +27732,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23256,7 +27757,7 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyFtrl" + name: "ResourceSparseApplyMomentum" input_arg { name: "var" type: DT_RESOURCE @@ -23266,8 +27767,8 @@ op { type: DT_RESOURCE } input_arg { - name: "linear" - type: DT_RESOURCE + name: "lr" + type_attr: "T" } input_arg { name: "grad" @@ -23278,19 +27779,7 @@ op { type_attr: "Tindices" } input_arg { - name: "lr" - type_attr: "T" - } - input_arg { - name: "l1" - type_attr: "T" - } - input_arg { - name: "l2" - type_attr: "T" - } - input_arg { - name: "lr_power" + name: "momentum" type_attr: "T" } attr { @@ -23332,10 +27821,17 @@ op { b: false } } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } is_stateful: true } op { - name: "ResourceSparseApplyFtrlV2" + name: "ResourceSparseApplyMomentum" input_arg { name: "var" type: DT_RESOURCE @@ -23345,8 +27841,8 @@ op { type: DT_RESOURCE } input_arg { - name: "linear" - type: DT_RESOURCE + name: "lr" + type_attr: "T" } input_arg { name: "grad" @@ -23356,6 +27852,70 @@ op { name: "indices" type_attr: "Tindices" } + input_arg { + name: "momentum" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} +op { + name: "ResourceSparseApplyProximalAdagrad" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "accum" + type: DT_RESOURCE + } input_arg { name: "lr" type_attr: "T" @@ -23369,12 +27929,12 @@ op { type_attr: "T" } input_arg { - name: "l2_shrinkage" + name: "grad" type_attr: "T" } input_arg { - name: "lr_power" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } attr { name: "T" @@ -23418,7 +27978,7 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyMomentum" + name: "ResourceSparseApplyProximalAdagrad" input_arg { name: "var" type: DT_RESOURCE @@ -23432,17 +27992,21 @@ op { type_attr: "T" } input_arg { - name: "grad" + name: "l1" type_attr: "T" } input_arg { - name: "indices" - type_attr: "Tindices" + name: "l2" + type_attr: "T" } input_arg { - name: "momentum" + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } attr { name: "T" type: "type" @@ -23462,6 +28026,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23482,8 +28048,68 @@ op { b: false } } + is_stateful: true +} +op { + name: "ResourceSparseApplyProximalGradientDescent" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "alpha" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } attr { - name: "use_nesterov" + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" type: "bool" default_value { b: false @@ -23492,17 +28118,13 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyProximalAdagrad" + name: "ResourceSparseApplyProximalGradientDescent" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "accum" - type: DT_RESOURCE - } - input_arg { - name: "lr" + name: "alpha" type_attr: "T" } input_arg { @@ -23540,6 +28162,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23563,21 +28187,33 @@ op { is_stateful: true } op { - name: "ResourceSparseApplyProximalGradientDescent" + name: "ResourceSparseApplyRMSProp" input_arg { name: "var" type: DT_RESOURCE } input_arg { - name: "alpha" + name: "ms" + type: DT_RESOURCE + } + input_arg { + name: "mom" + type: DT_RESOURCE + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "l1" + name: "rho" type_attr: "T" } input_arg { - name: "l2" + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" type_attr: "T" } input_arg { @@ -23686,6 +28322,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24743,6 +29381,40 @@ op { } } } +op { + name: "ScalarSummary" + input_arg { + name: "tags" + type: DT_STRING + } + input_arg { + name: "values" + type_attr: "T" + } + output_arg { + name: "summary" + type: DT_STRING + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "ScanDataset" input_arg { @@ -24849,6 +29521,68 @@ op { } } } +op { + name: "ScatterAdd" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ScatterDiv" input_arg { @@ -24910,11 +29644,349 @@ op { } } op { - name: "ScatterMul" + name: "ScatterDiv" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterMul" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterMul" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterNd" + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + input_arg { + name: "shape" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "ScatterNdAdd" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterNdAdd" + input_arg { + name: "ref" + type_attr: "T" + is_ref: true + } + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "updates" + type_attr: "T" + } + output_arg { + name: "output_ref" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "ScatterNdNonAliasingAdd" input_arg { - name: "ref" + name: "input" type_attr: "T" - is_ref: true } input_arg { name: "indices" @@ -24925,9 +29997,8 @@ op { type_attr: "T" } output_arg { - name: "output_ref" + name: "output" type_attr: "T" - is_ref: true } attr { name: "T" @@ -24961,16 +30032,13 @@ op { } } } - attr { - name: "use_locking" - type: "bool" - default_value { - b: false - } - } } op { - name: "ScatterNd" + name: "ScatterNdNonAliasingAdd" + input_arg { + name: "input" + type_attr: "T" + } input_arg { name: "indices" type_attr: "Tindices" @@ -24979,10 +30047,6 @@ op { name: "updates" type_attr: "T" } - input_arg { - name: "shape" - type_attr: "Tindices" - } output_arg { name: "output" type_attr: "T" @@ -24990,6 +30054,26 @@ op { attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } attr { name: "Tindices" @@ -25003,7 +30087,7 @@ op { } } op { - name: "ScatterNdAdd" + name: "ScatterNdSub" input_arg { name: "ref" type_attr: "T" @@ -25063,10 +30147,11 @@ op { } } op { - name: "ScatterNdNonAliasingAdd" + name: "ScatterNdSub" input_arg { - name: "input" + name: "ref" type_attr: "T" + is_ref: true } input_arg { name: "indices" @@ -25077,8 +30162,9 @@ op { type_attr: "T" } output_arg { - name: "output" + name: "output_ref" type_attr: "T" + is_ref: true } attr { name: "T" @@ -25099,6 +30185,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25112,9 +30200,16 @@ op { } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "ScatterNdSub" + name: "ScatterNdUpdate" input_arg { name: "ref" type_attr: "T" @@ -25136,24 +30231,6 @@ op { attr { name: "T" type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT64 - type: DT_INT32 - type: DT_UINT8 - type: DT_UINT16 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_HALF - } - } } attr { name: "Tindices" @@ -25169,12 +30246,12 @@ op { name: "use_locking" type: "bool" default_value { - b: false + b: true } } } op { - name: "ScatterNdUpdate" + name: "ScatterSub" input_arg { name: "ref" type_attr: "T" @@ -25196,6 +30273,24 @@ op { attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { name: "Tindices" @@ -25211,7 +30306,7 @@ op { name: "use_locking" type: "bool" default_value { - b: true + b: false } } } @@ -25254,6 +30349,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25440,36 +30537,250 @@ op { minimum: 1 } attr { - name: "num_inner_iterations" - type: "int" - has_minimum: true - minimum: 1 + name: "num_inner_iterations" + type: "int" + has_minimum: true + minimum: 1 + } +} +op { + name: "SdcaShrinkL1" + input_arg { + name: "weights" + type: DT_FLOAT + number_attr: "num_features" + is_ref: true + } + attr { + name: "num_features" + type: "int" + has_minimum: true + } + attr { + name: "l1" + type: "float" + } + attr { + name: "l2" + type: "float" + } +} +op { + name: "SegmentMax" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SegmentMax" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SegmentMean" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SegmentMean" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } } op { - name: "SdcaShrinkL1" + name: "SegmentMin" input_arg { - name: "weights" - type: DT_FLOAT - number_attr: "num_features" - is_ref: true + name: "data" + type_attr: "T" } - attr { - name: "num_features" - type: "int" - has_minimum: true + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + output_arg { + name: "output" + type_attr: "T" } attr { - name: "l1" - type: "float" + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } } attr { - name: "l2" - type: "float" + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } } op { - name: "SegmentMax" + name: "SegmentMin" input_arg { name: "data" type_attr: "T" @@ -25496,6 +30807,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25511,7 +30824,7 @@ op { } } op { - name: "SegmentMean" + name: "SegmentProd" input_arg { name: "data" type_attr: "T" @@ -25531,12 +30844,17 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } @@ -25553,7 +30871,7 @@ op { } } op { - name: "SegmentMin" + name: "SegmentProd" input_arg { name: "data" type_attr: "T" @@ -25573,13 +30891,20 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25595,7 +30920,7 @@ op { } } op { - name: "SegmentProd" + name: "SegmentSum" input_arg { name: "data" type_attr: "T" @@ -25674,6 +30999,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26163,13 +31490,436 @@ op { } } op { - name: "Sigmoid" + name: "Sigmoid" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "SigmoidGrad" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "SigmoidGrad" + input_arg { + name: "y" + type_attr: "T" + } + input_arg { + name: "dy" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Sign" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Sin" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Sinh" + input_arg { + name: "x" + type_attr: "T" + } + output_arg { + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Size" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "out_type" + } + attr { + name: "T" + type: "type" + } + attr { + name: "out_type" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SkipDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "count" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "SkipDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "count" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "Skipgram" + output_arg { + name: "vocab_word" + type: DT_STRING + } + output_arg { + name: "vocab_freq" + type: DT_INT32 + } + output_arg { + name: "words_per_epoch" + type: DT_INT64 + } + output_arg { + name: "current_epoch" + type: DT_INT32 + } + output_arg { + name: "total_words_processed" + type: DT_INT64 + } + output_arg { + name: "examples" + type: DT_INT32 + } + output_arg { + name: "labels" + type: DT_INT32 + } + attr { + name: "filename" + type: "string" + } + attr { + name: "batch_size" + type: "int" + } + attr { + name: "window_size" + type: "int" + default_value { + i: 5 + } + } + attr { + name: "min_count" + type: "int" + default_value { + i: 5 + } + } + attr { + name: "subsample" + type: "float" + default_value { + f: 0.001 + } + } + deprecation { + version: 19 + } + is_stateful: true +} +op { + name: "Slice" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "begin" + type_attr: "Index" + } + input_arg { + name: "size" + type_attr: "Index" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "Index" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SloppyInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "SloppyInterleaveDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "cycle_length" + type: DT_INT64 + } + input_arg { + name: "block_length" + type: DT_INT64 + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "Softmax" input_arg { - name: "x" + name: "logits" type_attr: "T" } output_arg { - name: "y" + name: "softmax" type_attr: "T" } attr { @@ -26180,24 +31930,26 @@ op { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } } op { - name: "SigmoidGrad" + name: "SoftmaxCrossEntropyWithLogits" input_arg { - name: "x" + name: "features" type_attr: "T" } input_arg { - name: "y" + name: "labels" type_attr: "T" } output_arg { - name: "z" + name: "loss" + type_attr: "T" + } + output_arg { + name: "backprop" type_attr: "T" } attr { @@ -26208,24 +31960,46 @@ op { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } } op { - name: "SigmoidGrad" + name: "Softplus" input_arg { - name: "y" + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" type_attr: "T" } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} +op { + name: "Softplus" input_arg { - name: "dy" + name: "features" type_attr: "T" } output_arg { - name: "z" + name: "activations" type_attr: "T" } attr { @@ -26233,23 +32007,33 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Sign" + name: "SoftplusGrad" input_arg { - name: "x" + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" type_attr: "T" } output_arg { - name: "y" + name: "backprops" type_attr: "T" } attr { @@ -26257,25 +32041,61 @@ op { type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 type: DT_HALF + } + } + } +} +op { + name: "SoftplusGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { type: DT_FLOAT type: DT_DOUBLE type: DT_INT32 type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Sin" + name: "Softsign" input_arg { - name: "x" + name: "features" type_attr: "T" } output_arg { - name: "y" + name: "activations" type_attr: "T" } attr { @@ -26283,23 +32103,61 @@ op { type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 type: DT_HALF + } + } + } +} +op { + name: "Softsign" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Sinh" + name: "SoftsignGrad" input_arg { - name: "x" + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" type_attr: "T" } output_arg { - name: "y" + name: "backprops" type_attr: "T" } attr { @@ -26307,31 +32165,73 @@ op { type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 type: DT_HALF + } + } + } +} +op { + name: "SoftsignGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { type: DT_FLOAT type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Size" + name: "SpaceToBatch" input_arg { name: "input" type_attr: "T" } + input_arg { + name: "paddings" + type_attr: "Tpaddings" + } output_arg { name: "output" - type_attr: "out_type" + type_attr: "T" } attr { name: "T" type: "type" } attr { - name: "out_type" + name: "Tpaddings" type: "type" default_value { type: DT_INT32 @@ -26343,256 +32243,506 @@ op { } } } + attr { + name: "block_size" + type: "int" + has_minimum: true + minimum: 2 + } } op { - name: "SkipDataset" + name: "SpaceToBatchND" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "input" + type_attr: "T" } input_arg { - name: "count" - type: DT_INT64 + name: "block_shape" + type_attr: "Tblock_shape" + } + input_arg { + name: "paddings" + type_attr: "Tpaddings" } output_arg { - name: "handle" - type: DT_VARIANT + name: "output" + type_attr: "T" } attr { - name: "output_types" - type: "list(type)" + name: "T" + type: "type" + } + attr { + name: "Tblock_shape" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tpaddings" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "SpaceToDepth" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + } + attr { + name: "block_size" + type: "int" has_minimum: true - minimum: 1 + minimum: 2 + } +} +op { + name: "SpaceToDepth" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" } attr { - name: "output_shapes" - type: "list(shape)" + name: "T" + type: "type" + } + attr { + name: "block_size" + type: "int" has_minimum: true - minimum: 1 + minimum: 2 + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + s: "NCHW_VECT_C" + } + } } - is_stateful: true } op { - name: "SkipDataset" + name: "SparseAccumulatorApplyGradient" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "handle" + type: DT_STRING + is_ref: true } input_arg { - name: "count" + name: "local_step" type: DT_INT64 } - output_arg { + input_arg { + name: "gradient_indices" + type: DT_INT64 + } + input_arg { + name: "gradient_values" + type_attr: "dtype" + } + input_arg { + name: "gradient_shape" + type: DT_INT64 + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "has_known_shape" + type: "bool" + } +} +op { + name: "SparseAccumulatorApplyGradient" + input_arg { name: "handle" - type: DT_VARIANT + type: DT_STRING + is_ref: true + } + input_arg { + name: "local_step" + type: DT_INT64 + } + input_arg { + name: "gradient_indices" + type: DT_INT64 + } + input_arg { + name: "gradient_values" + type_attr: "dtype" + } + input_arg { + name: "gradient_shape" + type: DT_INT64 } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "has_known_shape" + type: "bool" } } op { - name: "Skipgram" - output_arg { - name: "vocab_word" + name: "SparseAccumulatorTakeGradient" + input_arg { + name: "handle" type: DT_STRING + is_ref: true } - output_arg { - name: "vocab_freq" + input_arg { + name: "num_required" type: DT_INT32 } output_arg { - name: "words_per_epoch" + name: "indices" type: DT_INT64 } output_arg { - name: "current_epoch" - type: DT_INT32 + name: "values" + type_attr: "dtype" } output_arg { - name: "total_words_processed" + name: "shape" type: DT_INT64 } - output_arg { - name: "examples" + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } +} +op { + name: "SparseAccumulatorTakeGradient" + input_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "num_required" type: DT_INT32 } output_arg { - name: "labels" - type: DT_INT32 + name: "indices" + type: DT_INT64 } - attr { - name: "filename" - type: "string" + output_arg { + name: "values" + type_attr: "dtype" } - attr { - name: "batch_size" - type: "int" + output_arg { + name: "shape" + type: DT_INT64 } attr { - name: "window_size" - type: "int" - default_value { - i: 5 + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } } } - attr { - name: "min_count" - type: "int" - default_value { - i: 5 - } +} +op { + name: "SparseAdd" + input_arg { + name: "a_indices" + type: DT_INT64 } - attr { - name: "subsample" - type: "float" - default_value { - f: 0.001 - } + input_arg { + name: "a_values" + type_attr: "T" } - deprecation { - version: 19 + input_arg { + name: "a_shape" + type: DT_INT64 } - is_stateful: true -} -op { - name: "Slice" input_arg { - name: "input" + name: "b_indices" + type: DT_INT64 + } + input_arg { + name: "b_values" type_attr: "T" } input_arg { - name: "begin" - type_attr: "Index" + name: "b_shape" + type: DT_INT64 } input_arg { - name: "size" - type_attr: "Index" + name: "thresh" + type_attr: "Treal" } output_arg { - name: "output" + name: "sum_indices" + type: DT_INT64 + } + output_arg { + name: "sum_values" type_attr: "T" } + output_arg { + name: "sum_shape" + type: DT_INT64 + } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } } attr { - name: "Index" + name: "Treal" type: "type" allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF } } } } op { - name: "SloppyInterleaveDataset" + name: "SparseAdd" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "a_indices" + type: DT_INT64 } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "a_values" + type_attr: "T" } input_arg { - name: "cycle_length" + name: "a_shape" type: DT_INT64 } input_arg { - name: "block_length" + name: "b_indices" + type: DT_INT64 + } + input_arg { + name: "b_values" + type_attr: "T" + } + input_arg { + name: "b_shape" type: DT_INT64 } + input_arg { + name: "thresh" + type_attr: "Treal" + } output_arg { - name: "handle" - type: DT_VARIANT + name: "sum_indices" + type: DT_INT64 } - attr { - name: "f" - type: "func" + output_arg { + name: "sum_values" + type_attr: "T" } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true + output_arg { + name: "sum_shape" + type: DT_INT64 } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Treal" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } - is_stateful: true } op { - name: "SloppyInterleaveDataset" + name: "SparseAddGrad" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "backprop_val_grad" + type_attr: "T" } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "a_indices" + type: DT_INT64 } input_arg { - name: "cycle_length" + name: "b_indices" type: DT_INT64 } input_arg { - name: "block_length" + name: "sum_indices" type: DT_INT64 } output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } -} -op { - name: "Softmax" - input_arg { - name: "logits" + name: "a_val_grad" type_attr: "T" } output_arg { - name: "softmax" + name: "b_val_grad" type_attr: "T" } attr { @@ -26600,29 +32750,48 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } } op { - name: "SoftmaxCrossEntropyWithLogits" + name: "SparseAddGrad" input_arg { - name: "features" + name: "backprop_val_grad" type_attr: "T" } input_arg { - name: "labels" - type_attr: "T" + name: "a_indices" + type: DT_INT64 + } + input_arg { + name: "b_indices" + type: DT_INT64 + } + input_arg { + name: "sum_indices" + type: DT_INT64 } output_arg { - name: "loss" + name: "a_val_grad" type_attr: "T" } output_arg { - name: "backprop" + name: "b_val_grad" type_attr: "T" } attr { @@ -26630,22 +32799,67 @@ op { type: "type" allowed_values { list { - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Softplus" + name: "SparseApplyAdadelta" input_arg { - name: "features" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum_update" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { - name: "activations" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" @@ -26654,30 +32868,80 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SoftplusGrad" + name: "SparseApplyAdadelta" input_arg { - name: "gradients" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" type_attr: "T" + is_ref: true } input_arg { - name: "features" + name: "accum_update" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" type_attr: "T" } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { - name: "backprops" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" @@ -26686,26 +32950,69 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "Softsign" + name: "SparseApplyAdagrad" input_arg { - name: "features" + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { - name: "activations" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" @@ -26714,30 +33021,67 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SoftsignGrad" + name: "SparseApplyAdagrad" input_arg { - name: "gradients" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "features" + name: "accum" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" type_attr: "T" } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { - name: "backprops" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" @@ -26746,96 +33090,112 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SpaceToBatch" + name: "SparseApplyAdagradDA" input_arg { - name: "input" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "paddings" - type_attr: "Tpaddings" + name: "gradient_accumulator" + type_attr: "T" + is_ref: true } - output_arg { - name: "output" + input_arg { + name: "gradient_squared_accumulator" + type_attr: "T" + is_ref: true + } + input_arg { + name: "grad" type_attr: "T" } - attr { - name: "T" - type: "type" - } - attr { - name: "Tpaddings" - type: "type" - default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } + input_arg { + name: "indices" + type_attr: "Tindices" } - attr { - name: "block_size" - type: "int" - has_minimum: true - minimum: 2 + input_arg { + name: "lr" + type_attr: "T" } -} -op { - name: "SpaceToBatchND" input_arg { - name: "input" + name: "l1" type_attr: "T" } input_arg { - name: "block_shape" - type_attr: "Tblock_shape" + name: "l2" + type_attr: "T" } input_arg { - name: "paddings" - type_attr: "Tpaddings" + name: "global_step" + type: DT_INT64 } output_arg { - name: "output" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" type: "type" - } - attr { - name: "Tblock_shape" - type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF } } } attr { - name: "Tpaddings" + name: "Tindices" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { type: DT_INT32 @@ -26843,88 +33203,62 @@ op { } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SpaceToDepth" + name: "SparseApplyAdagradDA" input_arg { - name: "input" + name: "var" type_attr: "T" + is_ref: true } - output_arg { - name: "output" + input_arg { + name: "gradient_accumulator" type_attr: "T" + is_ref: true } - attr { - name: "T" - type: "type" - } - attr { - name: "block_size" - type: "int" - has_minimum: true - minimum: 2 - } -} -op { - name: "SpaceToDepth" input_arg { - name: "input" + name: "gradient_squared_accumulator" type_attr: "T" + is_ref: true } - output_arg { - name: "output" + input_arg { + name: "grad" type_attr: "T" } - attr { - name: "T" - type: "type" - } - attr { - name: "block_size" - type: "int" - has_minimum: true - minimum: 2 - } - attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - s: "NCHW_VECT_C" - } - } - } -} -op { - name: "SparseAccumulatorApplyGradient" input_arg { - name: "handle" - type: DT_STRING - is_ref: true + name: "indices" + type_attr: "Tindices" } input_arg { - name: "local_step" - type: DT_INT64 + name: "lr" + type_attr: "T" } input_arg { - name: "gradient_indices" - type: DT_INT64 + name: "l1" + type_attr: "T" } input_arg { - name: "gradient_values" - type_attr: "dtype" + name: "l2" + type_attr: "T" } input_arg { - name: "gradient_shape" + name: "global_step" type: DT_INT64 } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } attr { - name: "dtype" + name: "T" type: "type" allowed_values { list { @@ -26942,39 +33276,82 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "has_known_shape" + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" type: "bool" + default_value { + b: false + } } } op { - name: "SparseAccumulatorTakeGradient" + name: "SparseApplyCenteredRMSProp" input_arg { - name: "handle" - type: DT_STRING + name: "var" + type_attr: "T" is_ref: true } input_arg { - name: "num_required" - type: DT_INT32 + name: "mg" + type_attr: "T" + is_ref: true } - output_arg { - name: "indices" - type: DT_INT64 + input_arg { + name: "ms" + type_attr: "T" + is_ref: true } - output_arg { - name: "values" - type_attr: "dtype" + input_arg { + name: "mom" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tindices" } output_arg { - name: "shape" - type: DT_INT64 + name: "out" + type_attr: "T" + is_ref: true } attr { - name: "dtype" + name: "T" type: "type" allowed_values { list { @@ -26995,48 +33372,74 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SparseAdd" + name: "SparseApplyCenteredRMSProp" input_arg { - name: "a_indices" - type: DT_INT64 + name: "var" + type_attr: "T" + is_ref: true } input_arg { - name: "a_values" + name: "mg" type_attr: "T" + is_ref: true } input_arg { - name: "a_shape" - type: DT_INT64 + name: "ms" + type_attr: "T" + is_ref: true } input_arg { - name: "b_indices" - type: DT_INT64 + name: "mom" + type_attr: "T" + is_ref: true } input_arg { - name: "b_values" + name: "lr" type_attr: "T" } input_arg { - name: "b_shape" - type: DT_INT64 + name: "rho" + type_attr: "T" } input_arg { - name: "thresh" - type_attr: "Treal" + name: "momentum" + type_attr: "T" } - output_arg { - name: "sum_indices" - type: DT_INT64 + input_arg { + name: "epsilon" + type_attr: "T" } - output_arg { - name: "sum_values" + input_arg { + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { - name: "sum_shape" - type: DT_INT64 + name: "out" + type_attr: "T" + is_ref: true } attr { name: "T" @@ -27057,52 +33460,74 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } attr { - name: "Treal" + name: "Tindices" type: "type" allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE type: DT_INT32 type: DT_INT64 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_UINT16 - type: DT_HALF } } } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SparseAddGrad" + name: "SparseApplyFtrl" input_arg { - name: "backprop_val_grad" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "a_indices" - type: DT_INT64 + name: "accum" + type_attr: "T" + is_ref: true } input_arg { - name: "b_indices" - type: DT_INT64 + name: "linear" + type_attr: "T" + is_ref: true } input_arg { - name: "sum_indices" - type: DT_INT64 + name: "grad" + type_attr: "T" } - output_arg { - name: "a_val_grad" + input_arg { + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "lr_power" type_attr: "T" } output_arg { - name: "b_val_grad" + name: "out" type_attr: "T" + is_ref: true } attr { name: "T" @@ -27126,9 +33551,26 @@ op { } } } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } } op { - name: "SparseApplyAdadelta" + name: "SparseApplyFtrl" input_arg { name: "var" type_attr: "T" @@ -27140,29 +33582,33 @@ op { is_ref: true } input_arg { - name: "accum_update" + name: "linear" type_attr: "T" is_ref: true } input_arg { - name: "lr" + name: "grad" type_attr: "T" } input_arg { - name: "rho" + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "lr" type_attr: "T" } input_arg { - name: "epsilon" + name: "l1" type_attr: "T" } input_arg { - name: "grad" + name: "l2" type_attr: "T" } input_arg { - name: "indices" - type_attr: "Tindices" + name: "lr_power" + type_attr: "T" } output_arg { name: "out" @@ -27188,6 +33634,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27210,7 +33658,7 @@ op { } } op { - name: "SparseApplyAdagrad" + name: "SparseApplyFtrlV2" input_arg { name: "var" type_attr: "T" @@ -27222,8 +33670,9 @@ op { is_ref: true } input_arg { - name: "lr" + name: "linear" type_attr: "T" + is_ref: true } input_arg { name: "grad" @@ -27233,6 +33682,26 @@ op { name: "indices" type_attr: "Tindices" } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" + type_attr: "T" + } + input_arg { + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" + } output_arg { name: "out" type_attr: "T" @@ -27279,19 +33748,19 @@ op { } } op { - name: "SparseApplyAdagradDA" + name: "SparseApplyFtrlV2" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "gradient_accumulator" + name: "accum" type_attr: "T" is_ref: true } input_arg { - name: "gradient_squared_accumulator" + name: "linear" type_attr: "T" is_ref: true } @@ -27316,8 +33785,12 @@ op { type_attr: "T" } input_arg { - name: "global_step" - type: DT_INT64 + name: "l2_shrinkage" + type_attr: "T" + } + input_arg { + name: "lr_power" + type_attr: "T" } output_arg { name: "out" @@ -27343,6 +33816,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27365,41 +33840,99 @@ op { } } op { - name: "SparseApplyCenteredRMSProp" + name: "SparseApplyMomentum" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "mg" + name: "accum" type_attr: "T" is_ref: true } input_arg { - name: "ms" + name: "lr" type_attr: "T" - is_ref: true } input_arg { - name: "mom" + name: "grad" type_attr: "T" - is_ref: true } input_arg { - name: "lr" + name: "indices" + type_attr: "Tindices" + } + input_arg { + name: "momentum" + type_attr: "T" + } + output_arg { + name: "out" type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } } +} +op { + name: "SparseApplyMomentum" input_arg { - name: "rho" + name: "var" type_attr: "T" + is_ref: true } input_arg { - name: "momentum" + name: "accum" type_attr: "T" + is_ref: true } input_arg { - name: "epsilon" + name: "lr" type_attr: "T" } input_arg { @@ -27410,6 +33943,10 @@ op { name: "indices" type_attr: "Tindices" } + input_arg { + name: "momentum" + type_attr: "T" + } output_arg { name: "out" type_attr: "T" @@ -27434,6 +33971,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27454,9 +33993,16 @@ op { b: false } } + attr { + name: "use_nesterov" + type: "bool" + default_value { + b: false + } + } } op { - name: "SparseApplyFtrl" + name: "SparseApplyProximalAdagrad" input_arg { name: "var" type_attr: "T" @@ -27467,19 +34013,6 @@ op { type_attr: "T" is_ref: true } - input_arg { - name: "linear" - type_attr: "T" - is_ref: true - } - input_arg { - name: "grad" - type_attr: "T" - } - input_arg { - name: "indices" - type_attr: "Tindices" - } input_arg { name: "lr" type_attr: "T" @@ -27493,9 +34026,13 @@ op { type_attr: "T" } input_arg { - name: "lr_power" + name: "grad" type_attr: "T" } + input_arg { + name: "indices" + type_attr: "Tindices" + } output_arg { name: "out" type_attr: "T" @@ -27542,7 +34079,7 @@ op { } } op { - name: "SparseApplyFtrlV2" + name: "SparseApplyProximalAdagrad" input_arg { name: "var" type_attr: "T" @@ -27553,19 +34090,6 @@ op { type_attr: "T" is_ref: true } - input_arg { - name: "linear" - type_attr: "T" - is_ref: true - } - input_arg { - name: "grad" - type_attr: "T" - } - input_arg { - name: "indices" - type_attr: "Tindices" - } input_arg { name: "lr" type_attr: "T" @@ -27579,12 +34103,12 @@ op { type_attr: "T" } input_arg { - name: "l2_shrinkage" + name: "grad" type_attr: "T" } input_arg { - name: "lr_power" - type_attr: "T" + name: "indices" + type_attr: "Tindices" } output_arg { name: "out" @@ -27610,6 +34134,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27632,19 +34158,22 @@ op { } } op { - name: "SparseApplyMomentum" + name: "SparseApplyProximalGradientDescent" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" + name: "alpha" type_attr: "T" - is_ref: true } input_arg { - name: "lr" + name: "l1" + type_attr: "T" + } + input_arg { + name: "l2" type_attr: "T" } input_arg { @@ -27655,10 +34184,6 @@ op { name: "indices" type_attr: "Tindices" } - input_arg { - name: "momentum" - type_attr: "T" - } output_arg { name: "out" type_attr: "T" @@ -27703,28 +34228,16 @@ op { b: false } } - attr { - name: "use_nesterov" - type: "bool" - default_value { - b: false - } - } } op { - name: "SparseApplyProximalAdagrad" + name: "SparseApplyProximalGradientDescent" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "accum" - type_attr: "T" - is_ref: true - } - input_arg { - name: "lr" + name: "alpha" type_attr: "T" } input_arg { @@ -27767,6 +34280,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27789,22 +34304,36 @@ op { } } op { - name: "SparseApplyProximalGradientDescent" + name: "SparseApplyRMSProp" input_arg { name: "var" type_attr: "T" is_ref: true } input_arg { - name: "alpha" + name: "ms" type_attr: "T" + is_ref: true } input_arg { - name: "l1" + name: "mom" type_attr: "T" + is_ref: true } input_arg { - name: "l2" + name: "lr" + type_attr: "T" + } + input_arg { + name: "rho" + type_attr: "T" + } + input_arg { + name: "momentum" + type_attr: "T" + } + input_arg { + name: "epsilon" type_attr: "T" } input_arg { @@ -27925,6 +34454,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28039,6 +34570,57 @@ op { } is_stateful: true } +op { + name: "SparseConditionalAccumulator" + output_arg { + name: "handle" + type: DT_STRING + is_ref: true + } + attr { + name: "dtype" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} op { name: "SparseCross" input_arg { @@ -28177,6 +34759,53 @@ op { } } } +op { + name: "SparseDenseCwiseAdd" + input_arg { + name: "sp_indices" + type: DT_INT64 + } + input_arg { + name: "sp_values" + type_attr: "T" + } + input_arg { + name: "sp_shape" + type: DT_INT64 + } + input_arg { + name: "dense" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "SparseDenseCwiseDiv" input_arg { @@ -28222,6 +34851,98 @@ op { } } } +op { + name: "SparseDenseCwiseDiv" + input_arg { + name: "sp_indices" + type: DT_INT64 + } + input_arg { + name: "sp_values" + type_attr: "T" + } + input_arg { + name: "sp_shape" + type: DT_INT64 + } + input_arg { + name: "dense" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "SparseDenseCwiseMul" + input_arg { + name: "sp_indices" + type: DT_INT64 + } + input_arg { + name: "sp_values" + type_attr: "T" + } + input_arg { + name: "sp_shape" + type: DT_INT64 + } + input_arg { + name: "dense" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } +} op { name: "SparseDenseCwiseMul" input_arg { @@ -28263,6 +34984,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28434,19 +35157,232 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} +op { + name: "SparseReduceMax" + input_arg { + name: "input_indices" + type: DT_INT64 + } + input_arg { + name: "input_values" + type_attr: "T" + } + input_arg { + name: "input_shape" + type: DT_INT64 + } + input_arg { + name: "reduction_axes" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "SparseReduceMaxSparse" + input_arg { + name: "input_indices" + type: DT_INT64 + } + input_arg { + name: "input_values" + type_attr: "T" + } + input_arg { + name: "input_shape" + type: DT_INT64 + } + input_arg { + name: "reduction_axes" + type: DT_INT32 + } + output_arg { + name: "output_indices" + type: DT_INT64 + } + output_arg { + name: "output_values" + type_attr: "T" + } + output_arg { + name: "output_shape" + type: DT_INT64 + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} +op { + name: "SparseReduceMaxSparse" + input_arg { + name: "input_indices" + type: DT_INT64 + } + input_arg { + name: "input_values" + type_attr: "T" + } + input_arg { + name: "input_shape" + type: DT_INT64 + } + input_arg { + name: "reduction_axes" + type: DT_INT32 + } + output_arg { + name: "output_indices" + type: DT_INT64 + } + output_arg { + name: "output_values" + type_attr: "T" + } + output_arg { + name: "output_shape" + type: DT_INT64 + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "SparseReduceSum" + input_arg { + name: "input_indices" + type: DT_INT64 + } + input_arg { + name: "input_values" + type_attr: "T" + } + input_arg { + name: "input_shape" + type: DT_INT64 + } + input_arg { + name: "reduction_axes" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF } } } } op { - name: "SparseReduceMaxSparse" + name: "SparseReduceSum" input_arg { name: "input_indices" type: DT_INT64 @@ -28464,17 +35400,9 @@ op { type: DT_INT32 } output_arg { - name: "output_indices" - type: DT_INT64 - } - output_arg { - name: "output_values" + name: "output" type_attr: "T" } - output_arg { - name: "output_shape" - type: DT_INT64 - } attr { name: "keep_dims" type: "bool" @@ -28489,19 +35417,26 @@ op { list { type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 type: DT_INT64 + type: DT_INT32 type: DT_UINT8 + type: DT_UINT16 type: DT_INT16 type: DT_INT8 - type: DT_UINT16 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "SparseReduceSum" + name: "SparseReduceSumSparse" input_arg { name: "input_indices" type: DT_INT64 @@ -28519,9 +35454,17 @@ op { type: DT_INT32 } output_arg { - name: "output" + name: "output_indices" + type: DT_INT64 + } + output_arg { + name: "output_values" type_attr: "T" } + output_arg { + name: "output_shape" + type: DT_INT64 + } attr { name: "keep_dims" type: "bool" @@ -28608,6 +35551,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28887,6 +35832,57 @@ op { } } } +op { + name: "SparseSegmentSum" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "indices" + type_attr: "Tidx" + } + input_arg { + name: "segment_ids" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "SparseSlice" input_arg { @@ -29050,6 +36046,60 @@ op { } } } +op { + name: "SparseSparseMaximum" + input_arg { + name: "a_indices" + type: DT_INT64 + } + input_arg { + name: "a_values" + type_attr: "T" + } + input_arg { + name: "a_shape" + type: DT_INT64 + } + input_arg { + name: "b_indices" + type: DT_INT64 + } + input_arg { + name: "b_values" + type_attr: "T" + } + input_arg { + name: "b_shape" + type: DT_INT64 + } + output_arg { + name: "output_indices" + type: DT_INT64 + } + output_arg { + name: "output_values" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "SparseSparseMinimum" input_arg { @@ -29107,6 +36157,65 @@ op { } } } +op { + name: "SparseSparseMinimum" + input_arg { + name: "a_indices" + type: DT_INT64 + } + input_arg { + name: "a_values" + type_attr: "T" + } + input_arg { + name: "a_shape" + type: DT_INT64 + } + input_arg { + name: "b_indices" + type: DT_INT64 + } + input_arg { + name: "b_values" + type_attr: "T" + } + input_arg { + name: "b_shape" + type: DT_INT64 + } + output_arg { + name: "output_indices" + type: DT_INT64 + } + output_arg { + name: "output_values" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "SparseSplit" input_arg { @@ -29206,6 +36315,63 @@ op { } } } +op { + name: "SparseTensorDenseAdd" + input_arg { + name: "a_indices" + type_attr: "Tindices" + } + input_arg { + name: "a_values" + type_attr: "T" + } + input_arg { + name: "a_shape" + type_attr: "Tindices" + } + input_arg { + name: "b" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "SparseTensorDenseMatMul" input_arg { @@ -30622,60 +37788,117 @@ op { } } op { - name: "Sub" + name: "Sub" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_UINT8 + type: DT_INT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "Substr" input_arg { - name: "x" + name: "input" + type: DT_STRING + } + input_arg { + name: "pos" type_attr: "T" } input_arg { - name: "y" + name: "len" type_attr: "T" } output_arg { - name: "z" - type_attr: "T" + name: "output" + type: DT_STRING } attr { name: "T" type: "type" allowed_values { list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_UINT8 - type: DT_INT8 - type: DT_UINT16 - type: DT_INT16 type: DT_INT32 type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } } op { - name: "Substr" + name: "Sum" input_arg { name: "input" - type: DT_STRING - } - input_arg { - name: "pos" type_attr: "T" } input_arg { - name: "len" - type_attr: "T" + name: "reduction_indices" + type_attr: "Tidx" } output_arg { name: "output" - type: DT_STRING + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false + } } attr { name: "T" type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } allowed_values { list { type: DT_INT32 @@ -30724,6 +37947,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -32420,6 +39645,98 @@ op { version: 7 } } +op { + name: "TopK" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "values" + type_attr: "T" + } + output_arg { + name: "indices" + type: DT_INT32 + } + attr { + name: "k" + type: "int" + has_minimum: true + } + attr { + name: "sorted" + type: "bool" + default_value { + b: true + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + deprecation { + version: 7 + } +} +op { + name: "TopKV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "k" + type: DT_INT32 + } + output_arg { + name: "values" + type_attr: "T" + } + output_arg { + name: "indices" + type: DT_INT32 + } + attr { + name: "sorted" + type: "bool" + default_value { + b: true + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + } + } + } +} op { name: "TopKV2" input_arg { @@ -32459,6 +39776,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -32856,6 +40175,105 @@ op { } } } +op { + name: "UnsortedSegmentMax" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + input_arg { + name: "num_segments" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "UnsortedSegmentSum" + input_arg { + name: "data" + type_attr: "T" + } + input_arg { + name: "segment_ids" + type_attr: "Tindices" + } + input_arg { + name: "num_segments" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + } + } + } + attr { + name: "Tindices" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} op { name: "UnsortedSegmentSum" input_arg { @@ -32893,6 +40311,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 88e57ea0cb..53d99178e5 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -82,6 +82,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -157,6 +159,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -334,6 +338,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 type: DT_VARIANT } } @@ -738,6 +744,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -801,6 +809,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -885,6 +895,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -978,6 +990,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1075,6 +1089,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1159,6 +1175,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1247,6 +1265,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1304,6 +1324,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1371,6 +1393,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1452,6 +1476,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1519,6 +1545,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1602,6 +1630,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1649,6 +1679,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1696,6 +1728,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -1762,6 +1796,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -2025,6 +2061,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -2098,6 +2136,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -3337,6 +3377,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -3428,6 +3470,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -3722,6 +3766,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -3773,6 +3819,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -3829,6 +3877,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -4773,6 +4823,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -5723,6 +5775,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -5780,6 +5834,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -5850,6 +5906,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -7055,6 +7113,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -7122,6 +7182,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -7188,6 +7250,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -7960,6 +8024,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -9925,6 +9991,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -9959,6 +10027,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -10183,6 +10253,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -11460,6 +11532,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -11494,6 +11568,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -12969,6 +13045,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13361,6 +13439,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13441,6 +13521,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13517,6 +13599,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13593,6 +13677,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13672,6 +13758,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13748,6 +13836,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13893,6 +13983,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -13974,6 +14066,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -14165,6 +14259,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -14424,6 +14520,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -16612,6 +16710,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20165,6 +20265,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20194,6 +20296,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20230,6 +20334,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20266,6 +20372,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20864,6 +20972,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20920,6 +21030,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -20996,6 +21108,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21081,6 +21195,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21169,6 +21285,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21245,6 +21363,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21325,6 +21445,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21376,6 +21498,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21436,6 +21560,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21510,6 +21636,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21571,6 +21699,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21646,6 +21776,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21736,6 +21868,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21813,6 +21947,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21883,6 +22019,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -21974,6 +22112,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22068,6 +22208,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22159,6 +22301,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22254,6 +22398,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22330,6 +22476,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22419,6 +22567,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22495,6 +22645,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -22585,6 +22737,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23401,6 +23555,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23497,6 +23653,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23564,6 +23722,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23631,6 +23791,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23737,6 +23899,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23802,6 +23966,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23861,6 +24027,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -23977,6 +24145,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24263,6 +24433,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24309,6 +24481,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24355,6 +24529,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24406,6 +24582,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -24457,6 +24635,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25338,6 +25518,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25374,6 +25556,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25403,6 +25587,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25439,6 +25625,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25626,6 +25814,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25685,6 +25875,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25759,6 +25951,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25776,6 +25970,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25833,6 +26029,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25908,6 +26106,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -25985,6 +26185,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26084,6 +26286,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26187,6 +26391,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26286,6 +26492,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26389,6 +26597,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26472,6 +26682,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26568,6 +26780,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26650,6 +26864,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26748,6 +26964,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -26853,6 +27071,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27032,6 +27252,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27084,6 +27306,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27136,6 +27360,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27339,6 +27565,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27401,6 +27629,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27461,6 +27691,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27528,6 +27760,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -27831,6 +28065,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28038,6 +28274,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28105,6 +28343,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -28209,6 +28449,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -29732,6 +29974,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -31500,6 +31744,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -31554,6 +31800,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -31927,6 +32175,8 @@ op { type: DT_INT8 type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } @@ -31982,6 +32232,8 @@ op { type: DT_QUINT8 type: DT_QINT32 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } -- GitLab From 091504af57f70df13ebf1db9946dc59482e1190a Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 10 Oct 2017 10:29:43 -0700 Subject: [PATCH 223/909] Fix gradient behavior of fully dynamic tensor arrays + stop_gradients on tf.scan. Added a test checking that this fixes a bug with tf.stop_gradient of tf.scan output. PiperOrigin-RevId: 171697920 --- tensorflow/core/kernels/tensor_array.h | 49 +++++++++++++++++-- .../kernel_tests/functional_ops_test.py | 12 +++++ .../kernel_tests/tensor_array_ops_test.py | 8 ++- tensorflow/python/ops/functional_ops.py | 8 +-- tensorflow/python/ops/tensor_array_ops.py | 28 +++++------ 5 files changed, 80 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h index b43fafe921..6882a8a0e5 100644 --- a/tensorflow/core/kernels/tensor_array.h +++ b/tensorflow/core/kernels/tensor_array.h @@ -460,8 +460,9 @@ Status TensorArray::LockedWriteOrAggregate(OpKernelContext* ctx, "TensorArray ", handle_.vec()(1), ": Could not write to TensorArray index ", index, " because the value shape is ", value_t->shape().DebugString(), - " which is incompatible with the TensorArray's element shape: ", - element_shape_.DebugString(), "."); + " which is incompatible with the TensorArray's inferred element " + "shape: ", + element_shape_.DebugString(), " (consider setting infer_shape=False)."); } if (t.read) { @@ -530,11 +531,53 @@ template Status TensorArray::LockedRead(OpKernelContext* ctx, const int32 index, PersistentTensor* value) { TF_RETURN_IF_ERROR(LockedReturnIfClosed()); - if (index < 0 || static_cast(index) >= tensors_.size()) { + if ((index < 0) || + (!is_grad_ && (static_cast(index) >= tensors_.size()))) { return errors::InvalidArgument("Tried to read from index ", index, " but array size is: ", tensors_.size()); } + size_t index_t = static_cast(index); + if (is_grad_ && (index_t >= tensors_.size() || !tensors_[index].written)) { + // Special case returning zeros if this is a gradient read that happens + // after a stop_gradients call with dynamic forward TensorArrays. + // There is sometimes a race condition where the gradient is not + // written due to stop_gradients, but is later read. + TensorShape element_shape; + if (index_t < tensors_.size() && tensors_[index].shape.dims() > 0) { + element_shape = tensors_[index].shape; + } else if (!element_shape_.IsFullyDefined()) { + return errors::InvalidArgument( + "TensorArray ", handle_.vec()(1), + ": Could not read from gradient TensorArray index ", index, + ". Furthermore, the element shape is not fully defined: ", + element_shape_.DebugString(), + ". " + "It is likely you are working with a resizeable TensorArray and " + "stop_gradients " + "is not allowing the gradients to be written. If you set the full " + "element_shape " + "property on the forward TensorArray, the proper all-zeros tensor " + "will be " + "returned instead of incurring this error."); + } else { + DCHECK(element_shape_.AsTensorShape(&element_shape)); + } + if (index_t >= tensors_.size()) { + // Fill in tensors_ up to index to have known shape. + size_t old_tensors_size = tensors_.size(); + tensors_.resize(index + 1); + for (size_t i = old_tensors_size; i < index + 1; ++i) { + tensors_[i].shape = element_shape; + tensors_[i].written = true; + } + } else { + tensors_[index].shape = element_shape; + tensors_[index].written = true; + } + } + TensorAndState& t = tensors_[index]; + if (!t.written) { return errors::InvalidArgument("TensorArray ", handle_.vec()(1), ": Could not read from TensorArray index ", diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index 429b6c2e83..21fe588ac1 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -371,6 +371,18 @@ class FunctionalOpsTest(test.TestCase): r = gradients_impl.gradients(r, v)[0] self.assertAllEqual(873.0, r.eval()) + def testScanGradientWithPartStopGradient(self): + a = variables.Variable(0.0, name="a") + b = variables.Variable(0.0, name="b") + elems = array_ops.zeros(5) + l0, l1 = functional_ops.scan( + lambda elem_, input_: (a, b), elems, initializer=(0., 0.)) + loss = l0 + array_ops.stop_gradient(l1) + grad = gradients_impl.gradients(ys=[loss], xs=[a, b]) + with self.test_session(use_gpu=True) as sess: + variables.global_variables_initializer().run() + sess.run(grad) + def testFoldShape(self): with self.test_session(): x = constant_op.constant([[1, 2, 3], [4, 5, 6]]) diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index cffedf63f7..fc4f9b22b9 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -1066,7 +1066,10 @@ class TensorArrayTest(test.TestCase): infer_shape=True) w0 = ta1.split(value, [1, 2]) r0 = w0.read(0) - self.assertAllEqual(r0.get_shape(), tensor_shape.unknown_shape()) + self.assertEqual(r0.get_shape().ndims, None) + self.assertEqual( + tensor_shape.TensorShape( + ta1.handle.op.get_attr("element_shape")).ndims, None) def testWriteUnknownShape(self): with self.test_session(use_gpu=True): @@ -1142,10 +1145,11 @@ class TensorArrayTest(test.TestCase): # Don't actually perform the pack. This stores the static shape. ta.unstack(array_ops.zeros([0, 3, 5])).mark_used() packed = ta.stack() + concatenated = ta.concat() self.assertAllEqual([0, 3, 5], packed.eval().shape) # Concatenating zero tensors along their first dimension gives a # first dimension of zero - self.assertAllEqual([0, 5], ta.concat().eval().shape) + self.assertAllEqual([0, 5], concatenated.eval().shape) def testTensorArrayEvalEmptyWithDefault(self): self._testTensorArrayEvalEmptyWithDefault() diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index 413c29850e..96b799f610 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -545,9 +545,11 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, # Create a tensor array to store the intermediate values. accs_ta = [ - tensor_array_ops.TensorArray(dtype=init.dtype, size=n, - dynamic_size=False, - infer_shape=infer_shape) + tensor_array_ops.TensorArray( + dtype=init.dtype, size=n, + element_shape=init.shape if infer_shape else None, + dynamic_size=False, + infer_shape=infer_shape) for init in a_flat] if initializer is None: diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 08325ba771..37b4b3bcf9 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -301,6 +301,8 @@ class TensorArray(object): """ with ops.name_scope(name, "TensorArrayWrite", [self._handle, index, value]): value = ops.convert_to_tensor(value, name="value") + if self._infer_shape: + self._merge_element_shape(value.shape) with self._maybe_colocate_with(value): flow_out = gen_data_flow_ops._tensor_array_write_v3( handle=self._handle, @@ -314,8 +316,6 @@ class TensorArray(object): ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape ta._colocate_with = self._colocate_with - if ta._infer_shape: - ta._merge_element_shape(value.get_shape()) return ta def stack(self, name=None): @@ -433,6 +433,8 @@ class TensorArray(object): with ops.name_scope(name, "TensorArrayScatter", [self._handle, value, indices]): value = ops.convert_to_tensor(value, name="value") + if self._infer_shape and context.in_graph_mode(): + self._merge_element_shape(value.shape[1:]) with self._maybe_colocate_with(value): flow_out = gen_data_flow_ops._tensor_array_scatter_v3( handle=self._handle, @@ -446,12 +448,6 @@ class TensorArray(object): ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape ta._colocate_with = self._colocate_with - if ta._infer_shape and context.in_graph_mode(): - val_shape = flow_out.op.inputs[2].get_shape() - element_shape = tensor_shape.unknown_shape() - if val_shape.dims is not None: - element_shape = tensor_shape.TensorShape(val_shape.dims[1:]) - ta._merge_element_shape(element_shape) return ta @tf_should_use.should_use_result @@ -476,6 +472,13 @@ class TensorArray(object): value = ops.convert_to_tensor(value, name="value") with self._maybe_colocate_with(value): lengths_64 = math_ops.to_int64(lengths) + if self._infer_shape and context.in_graph_mode(): + clengths = tensor_util.constant_value(lengths_64) + if value.shape.dims is not None: + if clengths is not None and clengths.max() == clengths.min(): + self._merge_element_shape( + tensor_shape.TensorShape([clengths[0]]).concatenate( + value.shape[1:])) flow_out = gen_data_flow_ops._tensor_array_split_v3( handle=self._handle, value=value, @@ -488,15 +491,6 @@ class TensorArray(object): ta._infer_shape = self._infer_shape ta._element_shape = self._element_shape ta._colocate_with = self._colocate_with - if ta._infer_shape and context.in_graph_mode(): - val_shape = flow_out.op.inputs[1].get_shape() - clengths = tensor_util.constant_value(flow_out.op.inputs[2]) - element_shape = tensor_shape.unknown_shape() - if val_shape.dims is not None: - if clengths is not None and clengths.max() == clengths.min(): - element_shape = tensor_shape.TensorShape([clengths[0]] + - val_shape.dims[1:]) - ta._merge_element_shape(element_shape) return ta def size(self, name=None): -- GitLab From a83154967bb2955acc234f4a64b63b505508b728 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 10:31:15 -0700 Subject: [PATCH 224/909] Improve Eager mode random numbers. PiperOrigin-RevId: 171698189 --- tensorflow/python/eager/context.py | 36 +++++++++++++++++++ tensorflow/python/framework/random_seed.py | 24 ++++++++++--- .../python/framework/random_seed_test.py | 11 +++++- tensorflow/python/framework/test_util.py | 2 +- .../kernel_tests/multinomial_op_test.py | 13 +++++-- 5 files changed, 77 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index be3d535271..996748a870 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -20,6 +20,7 @@ from __future__ import print_function import contextlib import copy +import random import threading from tensorflow.python import pywrap_tensorflow @@ -42,6 +43,8 @@ _default_mode = GRAPH_MODE # and the idempotent nature of writes to provide thread safety. _device_parsing_cache = {} +_MAXINT32 = 2**31 - 1 + # TODO(agarwal): better name ? class _EagerContext(threading.local): @@ -76,8 +79,26 @@ class Context(object): self._summary_writer_resource = None self._post_execution_callbacks = [] self._config = config + self._seed = None self._initialize_lock = threading.Lock() + def _set_global_seed(self, seed): + """Set a global eager mode seed for random ops.""" + self._seed = seed + self._rng = random.Random(self._seed) + + def _internal_operation_seed(self): + """Returns a fake operation seed. + + In eager mode, user shouldn't set or depend on operation seed. + Here, we generate a random seed based on global seed to make + operation's randomness different and depend on the global seed. + + Returns: + A fake operation seed based on global seed. + """ + return self._rng.randint(0, _MAXINT32) + def _initialize_handle_and_devices(self): """Initialize handle and devices.""" with self._initialize_lock: @@ -326,6 +347,21 @@ def get_default_context(): return _context +def set_global_seed(seed): + """Sets the eager mode seed.""" + context()._set_global_seed(seed) # pylint: disable=protected-access + + +def global_seed(): + """Returns the eager mode seed.""" + return context()._seed # pylint: disable=protected-access + + +def internal_operation_seed(): + """Returns the operation seed generated based on global seed.""" + return context()._internal_operation_seed() # pylint: disable=protected-access + + def in_graph_mode(): """Returns True if current thread is in GRAPH mode for default context.""" return context().in_graph_mode() diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py index 0d8bd4bcf1..5f1130570d 100644 --- a/tensorflow/python/framework/random_seed.py +++ b/tensorflow/python/framework/random_seed.py @@ -20,6 +20,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import ops @@ -49,12 +50,22 @@ def get_seed(op_seed): A tuple of two integers that should be used for the local seed of this operation. """ - graph_seed = ops.get_default_graph().seed - if graph_seed is not None: + is_graph_mode = context.in_graph_mode() + + if is_graph_mode: + global_seed = ops.get_default_graph().seed + else: + global_seed = context.global_seed() + + if global_seed is not None: if op_seed is None: # pylint: disable=protected-access - op_seed = ops.get_default_graph()._last_id - seeds = _truncate_seed(graph_seed), _truncate_seed(op_seed) + if is_graph_mode: + op_seed = ops.get_default_graph()._last_id + else: + op_seed = context.internal_operation_seed() + + seeds = _truncate_seed(global_seed), _truncate_seed(op_seed) else: if op_seed is not None: seeds = DEFAULT_GRAPH_SEED, _truncate_seed(op_seed) @@ -162,4 +173,7 @@ def set_random_seed(seed): Args: seed: integer. """ - ops.get_default_graph().seed = seed + if context.in_graph_mode(): + ops.get_default_graph().seed = seed + else: + context.set_global_seed(seed) diff --git a/tensorflow/python/framework/random_seed_test.py b/tensorflow/python/framework/random_seed_test.py index c1d2b05b0b..b4c98ab8b2 100644 --- a/tensorflow/python/framework/random_seed_test.py +++ b/tensorflow/python/framework/random_seed_test.py @@ -18,12 +18,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util from tensorflow.python.platform import test class RandomSeedTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def testRandomSeed(self): test_cases = [ # Each test case is a tuple with input to get_seed: @@ -32,12 +35,18 @@ class RandomSeedTest(test.TestCase): # (output_graph_seed, output_op_seed) ((None, None), (None, None)), ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)), - ((1, None), (1, 0)), # 0 will be the default_graph._lastid. ((1, 1), (1, 1)), ((0, 0), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output ((2**31 - 1, 0), (0, 2**31 - 1)), # Don't wrap to (0, 0) either ((0, 2**31 - 1), (0, 2**31 - 1)), # Wrapping for the other argument ] + if context.in_graph_mode(): + # 0 will be the default_graph._lastid. + test_cases.append(((1, None), (1, 0))) + else: + # operation seed is random number generated based on global seed. + # it's not tested due to possibility of platform or version difference. + pass for tc in test_cases: tinput, toutput = tc[0], tc[1] random_seed.set_random_seed(tinput[0]) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index ef733136f4..c681ffb514 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -519,7 +519,7 @@ class TensorFlowTestCase(googletest.TestCase): # cleared first. ops._default_graph_stack.reset() # pylint: disable=protected-access ops.reset_default_graph() - ops.get_default_graph().seed = random_seed.DEFAULT_GRAPH_SEED + random_seed.set_random_seed(random_seed.DEFAULT_GRAPH_SEED) def tearDown(self): for thread in self._threads: diff --git a/tensorflow/python/kernel_tests/multinomial_op_test.py b/tensorflow/python/kernel_tests/multinomial_op_test.py index d6e1b2b4c0..ca48ba6cad 100644 --- a/tensorflow/python/kernel_tests/multinomial_op_test.py +++ b/tensorflow/python/kernel_tests/multinomial_op_test.py @@ -25,9 +25,11 @@ import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import random_seed from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -52,13 +54,14 @@ native_sampler = random_ops.multinomial class MultinomialTest(test.TestCase): + @test_util.run_in_graph_and_eager_modes() def testSmallEntropy(self): random_seed.set_random_seed(1618) - with self.test_session(use_gpu=True): + with test_util.device(use_gpu=True): # A logit value of -10 corresponds to a probability of ~5e-5. logits = constant_op.constant([[-10., 10., -10.], [-10., -10., 10.]]) num_samples = 1000 - samples = random_ops.multinomial(logits, num_samples).eval() + samples = self.evaluate(random_ops.multinomial(logits, num_samples)) self.assertAllEqual([[1] * num_samples, [2] * num_samples], samples) def testOneOpMultipleStepsIndependent(self): @@ -69,6 +72,12 @@ class MultinomialTest(test.TestCase): sample1b = sess.run(sample_op1) self.assertFalse(np.equal(sample1a, sample1b).all()) + def testEagerOneOpMultipleStepsIndependent(self): + with context.eager_mode(), test_util.device(use_gpu=True): + sample1, sample2 = self._make_ops(10) + # Consecutive runs shouldn't yield identical output. + self.assertFalse(np.equal(sample1.numpy(), sample2.numpy()).all()) + def testTwoOpsIndependent(self): with self.test_session(use_gpu=True) as sess: sample_op1, sample_op2 = self._make_ops(32) -- GitLab From 1bd776c9c217474b07c29dcd9d8fbbb6eba93ea0 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 10 Oct 2017 10:45:09 -0700 Subject: [PATCH 225/909] Automated g4 rollback of changelist 170772848 PiperOrigin-RevId: 171700278 --- tensorflow/core/kernels/BUILD | 12 +- tensorflow/core/kernels/where_op.cc | 140 ++++++++----- tensorflow/core/kernels/where_op.h | 20 +- .../{where_op_gpu.cu.cc => where_op_gpu.cu.h} | 186 +++++++++++++----- .../core/kernels/where_op_gpu_impl_1.cu.cc | 18 ++ .../core/kernels/where_op_gpu_impl_2.cu.cc | 18 ++ .../core/kernels/where_op_gpu_impl_3.cu.cc | 18 ++ .../core/kernels/where_op_gpu_impl_4.cu.cc | 18 ++ .../core/kernels/where_op_gpu_impl_5.cu.cc | 18 ++ tensorflow/core/ops/array_ops.cc | 33 +++- tensorflow/python/kernel_tests/BUILD | 2 +- .../python/kernel_tests/where_op_test.py | 38 ++++ tensorflow/python/ops/array_ops.py | 4 +- 13 files changed, 422 insertions(+), 103 deletions(-) rename tensorflow/core/kernels/{where_op_gpu.cu.cc => where_op_gpu.cu.h} (53%) create mode 100644 tensorflow/core/kernels/where_op_gpu_impl_1.cu.cc create mode 100644 tensorflow/core/kernels/where_op_gpu_impl_2.cu.cc create mode 100644 tensorflow/core/kernels/where_op_gpu_impl_3.cu.cc create mode 100644 tensorflow/core/kernels/where_op_gpu_impl_4.cu.cc create mode 100644 tensorflow/core/kernels/where_op_gpu_impl_5.cu.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index ad6f84304d..3b7d803bea 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -838,7 +838,17 @@ tf_kernel_library( tf_kernel_library( name = "where_op", - prefix = "where_op", + srcs = ["where_op.cc"], + hdrs = ["where_op.h"], + gpu_srcs = [ + "where_op.h", + "where_op_gpu.cu.h", + "where_op_gpu_impl_1.cu.cc", + "where_op_gpu_impl_2.cu.cc", + "where_op_gpu_impl_3.cu.cc", + "where_op_gpu_impl_4.cu.cc", + "where_op_gpu_impl_5.cu.cc", + ], deps = if_cuda([ ":cuda_solvers", "@cub_archive//:cub", diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index 59b474e41c..42d1365e64 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -52,19 +52,33 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { +namespace { +template +int64 CountAccumulator(const T* begin, const T* end) { + return std::accumulate(begin, end, 0L, [](int64 accum, const T& val) { + return accum + (val != T(0)); + }); +} + template <> -struct NumTrue { +int64 CountAccumulator(const bool* begin, const bool* end) { + return std::accumulate(begin, end, 0L); +} + +} // namespace + +template +struct NumTrue { static Status Compute(OpKernelContext* ctx, const CPUDevice& d, - TTypes::ConstFlat input, + typename TTypes::ConstFlat input, TTypes::Scalar num_true) { - *num_true.data() = - std::accumulate(input.data(), input.data() + input.size(), 0); + num_true() = CountAccumulator(input.data(), input.data() + input.size()); return Status::OK(); } }; -template -struct Where { +template +struct Where { EIGEN_ALWAYS_INLINE static void WriteIndexRowMajor( typename TTypes::Matrix output, const typename Eigen::DSizes& strides, TIndex true_n, @@ -77,7 +91,7 @@ struct Where { EIGEN_ALWAYS_INLINE static Status Compute( OpKernelContext* ctx, const CPUDevice& d, - typename TTypes::ConstTensor input, + typename TTypes::ConstTensor input, typename TTypes::Matrix output, TIndex* found_true) { Eigen::DSizes dims = input.dimensions(); Eigen::DSizes strides; @@ -93,7 +107,7 @@ struct Where { Eigen::DenseIndex output_size = output.dimension(0); for (Eigen::DenseIndex n = 0; n < input.size(); ++n) { - if (input.data()[n]) { + if (input.data()[n] != T(0)) { if (FastBoundsCheck(*found_true, output_size)) { WriteIndexRowMajor(output, strides, *found_true, n); } @@ -106,6 +120,7 @@ struct Where { } // namespace functor +template class WhereCPUOp : public OpKernel { public: explicit WhereCPUOp(OpKernelConstruction* context) : OpKernel(context) {} @@ -113,6 +128,12 @@ class WhereCPUOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); + OP_REQUIRES( + context, input.dtype() != DT_HALF, + errors::Unimplemented("No WhereOp available for float16/half type on " + "GPU; dying in CPU WhereOp to avoid silently " + "creating costly copies from device.")); + const int input_dims = input.dims(); Tensor num_true; @@ -120,8 +141,8 @@ class WhereCPUOp : public OpKernel { context, context->allocate_temp(DT_INT64, TensorShape({}), &num_true)); auto num_true_t = num_true.scalar(); - Status s = functor::NumTrue::Compute( - context, context->eigen_device(), input.flat(), + Status s = functor::NumTrue::Compute( + context, context->eigen_device(), input.flat(), num_true_t); OP_REQUIRES_OK(context, s); TensorShape output_shape({num_true_t(), input_dims}); @@ -134,12 +155,12 @@ class WhereCPUOp : public OpKernel { // separate threads below. int64 found_true = 0; -#define HANDLE_DIM(NDIM) \ - case NDIM: { \ - Status s = functor::Where::Compute( \ - context, context->eigen_device(), \ - input.tensor(), output->matrix(), &found_true); \ - OP_REQUIRES_OK(context, s); \ +#define HANDLE_DIM(NDIM) \ + case NDIM: { \ + Status s = functor::Where::Compute( \ + context, context->eigen_device(), input.tensor(), \ + output->matrix(), &found_true); \ + OP_REQUIRES_OK(context, s); \ } break; switch (input_dims) { @@ -169,44 +190,63 @@ class WhereCPUOp : public OpKernel { TF_DISALLOW_COPY_AND_ASSIGN(WhereCPUOp); }; -REGISTER_KERNEL_BUILDER(Name("Where").Device(DEVICE_CPU), WhereCPUOp); +#define REGISTER_WHERE_OP(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Where").Device(DEVICE_CPU).TypeConstraint("T"), WhereCPUOp); + +TF_CALL_NUMBER_TYPES(REGISTER_WHERE_OP); +TF_CALL_bool(REGISTER_WHERE_OP); + +#undef REGISTER_WHERE_OP #if GOOGLE_CUDA namespace functor { -#define DECLARE_GPU_NUMTRUE(Tindex) \ - template <> \ - Status NumTrue::Compute( \ - OpKernelContext* ctx, const GPUDevice& d, TTypes::ConstFlat input, \ - TTypes::Scalar num_true); \ - extern template struct NumTrue +#define DECLARE_GPU_NUMTRUE(T, Tindex) \ + template <> \ + Status NumTrue::Compute( \ + OpKernelContext* ctx, const GPUDevice& d, TTypes::ConstFlat input, \ + TTypes::Scalar num_true); \ + extern template struct NumTrue -DECLARE_GPU_NUMTRUE(int32); -DECLARE_GPU_NUMTRUE(int64); +#define DECLARE_GPU_NUMTRUE_TYPE(T) \ + DECLARE_GPU_NUMTRUE(T, int32); \ + DECLARE_GPU_NUMTRUE(T, int64); + +TF_CALL_NUMBER_TYPES(DECLARE_GPU_NUMTRUE_TYPE); +TF_CALL_bool(DECLARE_GPU_NUMTRUE_TYPE); + +#undef DECLARE_GPU_NUMTRUE_TYPE #undef DECLARE_GPU_NUMTRUE -#define DECLARE_GPU_WHERE_INDEX(Dims, Tindex) \ +#define DECLARE_GPU_WHERE_INDEX(Dims, T, Tindex) \ template <> \ - Status Where::Compute( \ + Status Where::Compute( \ OpKernelContext* ctx, const GPUDevice& d, \ - typename TTypes::ConstTensor input, \ + typename TTypes::ConstTensor input, \ typename TTypes::Matrix output, Tindex* found_true); \ - extern template struct Where; -#define DECLARE_GPU_WHERE(Dims) \ - DECLARE_GPU_WHERE_INDEX(Dims, int32); \ - DECLARE_GPU_WHERE_INDEX(Dims, int64); - -DECLARE_GPU_WHERE(1); -DECLARE_GPU_WHERE(2); -DECLARE_GPU_WHERE(3); -DECLARE_GPU_WHERE(4); -DECLARE_GPU_WHERE(5); + extern template struct Where; +#define DECLARE_GPU_WHERE(Dims, T) \ + DECLARE_GPU_WHERE_INDEX(Dims, T, int32); \ + DECLARE_GPU_WHERE_INDEX(Dims, T, int64); + +#define DECLARE_GPU_WHERE_TYPES(T) \ + DECLARE_GPU_WHERE(1, T); \ + DECLARE_GPU_WHERE(2, T); \ + DECLARE_GPU_WHERE(3, T); \ + DECLARE_GPU_WHERE(4, T); \ + DECLARE_GPU_WHERE(5, T); + +TF_CALL_WHERE_GPU_TYPES(DECLARE_GPU_WHERE_TYPES); + +#undef DECLARE_GPU_WHERE_TYPES #undef DECLARE_GPU_WHERE #undef DECLARE_GPU_WHERE_INDEX } // namespace functor +template class WhereGPUOp : public AsyncOpKernel { public: explicit WhereGPUOp(OpKernelConstruction* context) : AsyncOpKernel(context) {} @@ -242,8 +282,8 @@ class WhereGPUOp : public AsyncOpKernel { static_cast(num_true_t.data())); // Push kernel to stream to get number of true elements. const GPUDevice& d = context->eigen_device(); - Status s = functor::NumTrue::Compute( - context, d, input.flat(), num_true_t); + Status s = functor::NumTrue::Compute( + context, d, input.flat(), num_true_t); OP_REQUIRES_OK_ASYNC(context, s, done); // Copy num_true to host; @@ -279,12 +319,12 @@ class WhereGPUOp : public AsyncOpKernel { 0, TensorShape({num_true, input_dims}), &output), done); -#define HANDLE_DIM(NDIM) \ - case NDIM: { \ - Status s = functor::Where::Compute( \ - context, d, input.tensor(), output->matrix(), \ - &found_true); \ - OP_REQUIRES_OK_ASYNC(context, s, done); \ +#define HANDLE_DIM(NDIM) \ + case NDIM: { \ + Status s = functor::Where::Compute( \ + context, d, input.tensor(), output->matrix(), \ + &found_true); \ + OP_REQUIRES_OK_ASYNC(context, s, done); \ } break; switch (input_dims) { @@ -324,7 +364,13 @@ class WhereGPUOp : public AsyncOpKernel { TF_DISALLOW_COPY_AND_ASSIGN(WhereGPUOp); }; -REGISTER_KERNEL_BUILDER(Name("Where").Device(DEVICE_GPU), WhereGPUOp); +#define REGISTER_GPU_WHERE_OP(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("Where").Device(DEVICE_GPU).TypeConstraint("T"), WhereGPUOp); + +TF_CALL_WHERE_GPU_TYPES(REGISTER_GPU_WHERE_OP); + +#undef REGISTER_GPU_WHERE_OP #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/where_op.h b/tensorflow/core/kernels/where_op.h index e040325e3d..d26849c8bd 100644 --- a/tensorflow/core/kernels/where_op.h +++ b/tensorflow/core/kernels/where_op.h @@ -24,16 +24,28 @@ limitations under the License. namespace tensorflow { +#define TF_CALL_WHERE_GPU_TYPES(m) \ + TF_CALL_int8(m); \ + TF_CALL_uint8(m); \ + TF_CALL_int32(m); \ + TF_CALL_int64(m); \ + TF_CALL_float(m); \ + TF_CALL_double(m); \ + TF_CALL_complex64(m); \ + TF_CALL_complex128(m); \ + TF_CALL_bool(m); + namespace functor { -template +template struct NumTrue { EIGEN_ALWAYS_INLINE static Status Compute( - OpKernelContext* ctx, const Device& d, TTypes::ConstFlat input, + OpKernelContext* ctx, const Device& d, + typename TTypes::ConstFlat input, typename TTypes::Scalar num_true); }; -template +template struct Where { // Copies indices of true values in input into output. The pointer // found_true should sit on the host. Compute should copy the @@ -43,7 +55,7 @@ struct Where { // the true values and the call to Where. EIGEN_ALWAYS_INLINE static Status Compute( OpKernelContext* ctx, const Device& d, - typename TTypes::ConstTensor input, + typename TTypes::ConstTensor input, typename TTypes::Matrix output, TIndex* found_true); }; diff --git a/tensorflow/core/kernels/where_op_gpu.cu.cc b/tensorflow/core/kernels/where_op_gpu.cu.h similarity index 53% rename from tensorflow/core/kernels/where_op_gpu.cu.cc rename to tensorflow/core/kernels/where_op_gpu.cu.h index c7c54ccbb4..ce8e435c95 100644 --- a/tensorflow/core/kernels/where_op_gpu.cu.cc +++ b/tensorflow/core/kernels/where_op_gpu.cu.h @@ -21,6 +21,8 @@ limitations under the License. #include "external/cub_archive/cub/device/device_reduce.cuh" #include "external/cub_archive/cub/device/device_select.cuh" #include "external/cub_archive/cub/iterator/counting_input_iterator.cuh" +#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh" +#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/where_op.h" @@ -51,23 +53,103 @@ __global__ void PropagateWhereIndicesKernel( } } +namespace { + +template +struct IsNonzero { + EIGEN_DEVICE_FUNC IsNonzero() : zero(T(0)) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const T& x) const { + return (x != zero); + } + const T zero; +}; + +template +struct CubDeviceReduceCount { + cudaError_t operator()(void* d_temp_storage, size_t& temp_storage_bytes, + const T* d_in, TIndex* d_out, int num_items, + cudaStream_t stream = 0, + bool debug_synchronous = false) { + IsNonzero is_nonzero; + cub::TransformInputIterator, const T*> is_nonzero_iter( + d_in, is_nonzero); + return cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, + is_nonzero_iter, d_out, num_items, stream, + debug_synchronous); + } +}; + template -struct NumTrue { +struct CubDeviceReduceCount { + cudaError_t operator()(void* d_temp_storage, size_t& temp_storage_bytes, + const bool* d_in, TIndex* d_out, int num_items, + cudaStream_t stream = 0, + bool debug_synchronous = false) { + return cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, + d_out, num_items, stream, debug_synchronous); + } +}; + +template +struct CubDeviceSelectFlaggedCounter; + +template +struct CubDeviceSelectFlaggedCounter { + cudaError_t operator()(void* d_temp_storage, size_t& temp_storage_bytes, + const T* d_flags, OutputIterator d_out, + TIndex* d_num_selected_out, int num_items, + cudaStream_t stream = 0, + bool debug_synchronous = false) { + cub::CountingInputIterator select_counter(0); + IsNonzero is_nonzero; + cub::TransformInputIterator, const T*> is_nonzero_iter( + d_flags, is_nonzero); + return cub::DeviceSelect::Flagged( + d_temp_storage, temp_storage_bytes, select_counter /*d_in*/, + is_nonzero_iter /*d_flags*/, d_out, d_num_selected_out, num_items, + stream, debug_synchronous); + } +}; + +template +struct CubDeviceSelectFlaggedCounter { + cudaError_t operator()(void* d_temp_storage, size_t& temp_storage_bytes, + const T* d_flags, OutputIterator d_out, + TIndex* d_num_selected_out, int num_items, + cudaStream_t stream = 0, + bool debug_synchronous = false) { + cub::CountingInputIterator select_counter(0); + return cub::DeviceSelect::Flagged( + d_temp_storage, temp_storage_bytes, select_counter /*d_in*/, d_flags, + d_out, d_num_selected_out, num_items, stream, debug_synchronous); + } +}; + +} // namespace + +template +struct NumTrue { EIGEN_ALWAYS_INLINE static Status Compute( - OpKernelContext* ctx, const GPUDevice& d, TTypes::ConstFlat input, + OpKernelContext* ctx, const GPUDevice& d, + typename TTypes::ConstFlat input, typename TTypes::Scalar num_true) { const cudaStream_t& cu_stream = GetCudaStream(ctx); std::size_t temp_storage_bytes = 0; - const bool* input_data = input.data(); + const T* input_data = input.data(); TIndex* num_true_data = num_true.data(); - auto first_success = - cub::DeviceReduce::Sum(/*temp_storage*/ nullptr, temp_storage_bytes, - /*d_in*/ input_data, - /*d_out*/ num_true_data, - /*num_items*/ input.size(), - /*stream*/ cu_stream); + // TODO(ebrevdo): sum doesn't work; perhaps need a different + // iterator? + auto reducer = CubDeviceReduceCount(); + auto first_success = reducer(/*temp_storage*/ nullptr, temp_storage_bytes, + /*d_in*/ input_data, + /*d_out*/ num_true_data, + /*num_items*/ input.size(), + /*stream*/ cu_stream); if (first_success != cudaSuccess) { return errors::Internal( @@ -81,7 +163,7 @@ struct NumTrue { DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - auto second_success = cub::DeviceReduce::Sum( + auto second_success = reducer( /*temp_storage*/ temp_storage.flat().data(), temp_storage_bytes, /*d_in*/ input_data, /*d_out*/ num_true_data, @@ -91,7 +173,7 @@ struct NumTrue { if (second_success != cudaSuccess) { return errors::Internal( "WhereOp: Could not launch cub::DeviceReduce::Sum to count " - "number of true indices. temp_storage_bytes: ", + "number of true / nonzero indices. temp_storage_bytes: ", temp_storage_bytes, ", status: ", cudaGetErrorString(second_success)); } @@ -99,8 +181,20 @@ struct NumTrue { } }; -template struct NumTrue; -template struct NumTrue; +#define NUMTRUE_GPU_FUNCTOR(T) \ + template struct NumTrue; \ + template struct NumTrue; + +// We only need to declare the NumTrue functor once, but this file is +// included from where_op_gpu_impl_X.cu.cc for X=1,2,... +// Only declare for X = 1. +#if GPU_PROVIDED_DIM == 1 + +TF_CALL_WHERE_GPU_TYPES(NUMTRUE_GPU_FUNCTOR); + +#endif // GPU_PROVIDED_DIM == 1 + +#undef NUMTRUE_GPU_FUNCTOR template class WhereOutputIterator { @@ -143,9 +237,9 @@ class WhereOutputIterator { const Eigen::DenseIndex max_row_; }; -template +template Eigen::array CalculateStrides( - typename TTypes::ConstTensor input) { + typename TTypes::ConstTensor input) { const Eigen::DSizes dims = input.dimensions(); Eigen::array strides; EIGEN_STATIC_ASSERT((static_cast(decltype(input)::Layout) == @@ -158,12 +252,12 @@ Eigen::array CalculateStrides( return strides; } -template -struct Where { +template +struct Where { EIGEN_ALWAYS_INLINE static Status Compute( OpKernelContext* ctx, const GPUDevice& d, - typename TTypes::ConstTensor input, - typename TTypes::Matrix output, Tindex* found_true_host) { + typename TTypes::ConstTensor input, + typename TTypes::Matrix output, TIndex* found_true_host) { if (output.dimension(0) == 0) { // Nothing to do. return Status::OK(); @@ -173,25 +267,26 @@ struct Where { std::size_t temp_storage_bytes = 0; - cub::CountingInputIterator select_counter(0); - Tensor found_true_t; - TF_RETURN_IF_ERROR(ctx->allocate_temp(DataTypeToEnum::v(), + TF_RETURN_IF_ERROR(ctx->allocate_temp(DataTypeToEnum::v(), TensorShape({}), &found_true_t)); - Tindex* found_true_device = found_true_t.scalar().data(); + TIndex* found_true_device = found_true_t.scalar().data(); WhereOutputIterator output_iterator( output.data(), /* max_row */ output.dimension(0)); - auto first_success = - cub::DeviceSelect::Flagged(/*temp_storage*/ nullptr, temp_storage_bytes, - /*d_in*/ select_counter, - /*d_flags*/ input.data(), - /*d_out*/ output_iterator, - /*d_num_selected_out*/ found_true_device, - /*num_items*/ input.size(), - /*stream*/ cu_stream); + typedef std::decay DT; + CubDeviceSelectFlaggedCounter< + T, TIndex, typeof(output_iterator) /*OutputIterator*/, + std::is_convertible::value /*IsConvertibleToBool*/> + counter; + auto first_success = counter(/*temp_storage*/ nullptr, temp_storage_bytes, + /*d_flags*/ input.data(), + /*d_out*/ output_iterator, + /*d_num_selected_out*/ found_true_device, + /*num_items*/ input.size(), + /*stream*/ cu_stream); if (first_success != cudaSuccess) { return errors::Internal( "WhereOp: Could not launch cub::DeviceSelect::Flagged to calculate " @@ -204,9 +299,8 @@ struct Where { DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), &temp_storage)); - auto second_success = cub::DeviceSelect::Flagged( + auto second_success = counter( /*temp_storage*/ temp_storage.flat().data(), temp_storage_bytes, - /*d_in*/ select_counter, /*d_flags*/ input.data(), /*d_out*/ output_iterator, /*d_num_selected_out*/ found_true_device, @@ -223,11 +317,11 @@ struct Where { // TODO(ebrevdo): Find a way to synchronously copy back data from // found_true_device to *found_true_host. - const Eigen::array strides = - CalculateStrides(input); - const Tindex output_rows = output.dimension(0); + const Eigen::array strides = + CalculateStrides(input); + const TIndex output_rows = output.dimension(0); CudaLaunchConfig config = GetCudaLaunchConfig(output_rows, d); - PropagateWhereIndicesKernel + PropagateWhereIndicesKernel <<>>( output_rows, strides, output.data()); @@ -235,17 +329,14 @@ struct Where { } }; -#define DECLARE_GPU_SPEC_INDEX(Dims, Tindex) \ - template struct Where -#define DECLARE_GPU_SPEC(Dims) \ - DECLARE_GPU_SPEC_INDEX(Dims, int32); \ - DECLARE_GPU_SPEC_INDEX(Dims, int64) +#define DECLARE_GPU_SPEC_INDEX(Dims, T, TIndex) \ + template struct Where + +#define DECLARE_GPU_SPEC(T) \ + DECLARE_GPU_SPEC_INDEX(GPU_PROVIDED_DIM, T, int32); \ + DECLARE_GPU_SPEC_INDEX(GPU_PROVIDED_DIM, T, int64) -DECLARE_GPU_SPEC(1); -DECLARE_GPU_SPEC(2); -DECLARE_GPU_SPEC(3); -DECLARE_GPU_SPEC(4); -DECLARE_GPU_SPEC(5); +TF_CALL_WHERE_GPU_TYPES(DECLARE_GPU_SPEC); #undef DECLARE_GPU_SPEC #undef DECLARE_GPU_SPEC_INDEX @@ -253,4 +344,5 @@ DECLARE_GPU_SPEC(5); } // namespace functor } // namespace tensorflow + #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/where_op_gpu_impl_1.cu.cc b/tensorflow/core/kernels/where_op_gpu_impl_1.cu.cc new file mode 100644 index 0000000000..75ddfa76ea --- /dev/null +++ b/tensorflow/core/kernels/where_op_gpu_impl_1.cu.cc @@ -0,0 +1,18 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define GPU_PROVIDED_DIM 1 +#include "tensorflow/core/kernels/where_op_gpu.cu.h" +#undef GPU_PROVIDED_DIM diff --git a/tensorflow/core/kernels/where_op_gpu_impl_2.cu.cc b/tensorflow/core/kernels/where_op_gpu_impl_2.cu.cc new file mode 100644 index 0000000000..3a62259608 --- /dev/null +++ b/tensorflow/core/kernels/where_op_gpu_impl_2.cu.cc @@ -0,0 +1,18 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define GPU_PROVIDED_DIM 2 +#include "tensorflow/core/kernels/where_op_gpu.cu.h" +#undef GPU_PROVIDED_DIM diff --git a/tensorflow/core/kernels/where_op_gpu_impl_3.cu.cc b/tensorflow/core/kernels/where_op_gpu_impl_3.cu.cc new file mode 100644 index 0000000000..2ae5447175 --- /dev/null +++ b/tensorflow/core/kernels/where_op_gpu_impl_3.cu.cc @@ -0,0 +1,18 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define GPU_PROVIDED_DIM 3 +#include "tensorflow/core/kernels/where_op_gpu.cu.h" +#undef GPU_PROVIDED_DIM diff --git a/tensorflow/core/kernels/where_op_gpu_impl_4.cu.cc b/tensorflow/core/kernels/where_op_gpu_impl_4.cu.cc new file mode 100644 index 0000000000..e976bb4331 --- /dev/null +++ b/tensorflow/core/kernels/where_op_gpu_impl_4.cu.cc @@ -0,0 +1,18 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define GPU_PROVIDED_DIM 4 +#include "tensorflow/core/kernels/where_op_gpu.cu.h" +#undef GPU_PROVIDED_DIM diff --git a/tensorflow/core/kernels/where_op_gpu_impl_5.cu.cc b/tensorflow/core/kernels/where_op_gpu_impl_5.cu.cc new file mode 100644 index 0000000000..ccbe2d6499 --- /dev/null +++ b/tensorflow/core/kernels/where_op_gpu_impl_5.cu.cc @@ -0,0 +1,18 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define GPU_PROVIDED_DIM 5 +#include "tensorflow/core/kernels/where_op_gpu.cu.h" +#undef GPU_PROVIDED_DIM diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index ad111fc6b8..fec27c7c1c 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2715,14 +2715,15 @@ each repeated tile of `input` into `output`. // -------------------------------------------------------------------------- REGISTER_OP("Where") - .Input("input: bool") + .Input("input: T") + .Attr("T: {numbertype, bool} = DT_BOOL") .Output("index: int64") .SetShapeFn([](InferenceContext* c) { c->set_output(0, c->Matrix(c->UnknownDim(), c->Rank(c->input(0)))); return Status::OK(); }) .Doc(R"doc( -Returns locations of true values in a boolean tensor. +Returns locations of nonzero / true values in a tensor. This operation returns the coordinates of true elements in `input`. The coordinates are returned in a 2-D tensor where the first dimension (rows) @@ -2749,6 +2750,34 @@ where(input) ==> [[0, 0], # [False, True]]] # 'input' has 5 true values, so output has 5 coordinates. # 'input' has rank of 3, so coordinates have three indices. +where(input) ==> [[0, 0, 0], + [0, 1, 0], + [1, 0, 1], + [1, 1, 1], + [2, 1, 1]] + +# `input` tensor is [[[1.5, 0.0] +# [-0.5, 0.0]] +# [[0.0, 0.25] +# [0.0, 0.75]] +# [[0.0, 0.0] +# [0.0, 0.01]]] +# 'input' has 5 nonzero values, so output has 5 coordinates. +# 'input' has rank of 3, so coordinates have three indices. +where(input) ==> [[0, 0, 0], + [0, 1, 0], + [1, 0, 1], + [1, 1, 1], + [2, 1, 1]] + +# `input` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] +# [0.0 + 0.5j, 0.0 + 0.0j]] +# [[0.0 + 0.0j, 0.25 + 1.5j] +# [0.0 + 0.0j, 0.75 + 0.0j]] +# [[0.0 + 0.0j, 0.0 + 0.0j] +# [0.0 + 0.0j, 0.01 + 0.0j]]] +# 'input' has 5 nonzero magnitude values, so output has 5 coordinates. +# 'input' has rank of 3, so coordinates have three indices. where(input) ==> [[0, 0, 0], [0, 1, 0], [1, 0, 1], diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 6f618217f5..206c6a5692 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -971,7 +971,7 @@ tf_py_test( cuda_py_test( name = "where_op_test", - size = "small", + size = "medium", srcs = ["where_op_test.py"], additional_deps = [ "//third_party/py/numpy", diff --git a/tensorflow/python/kernel_tests/where_op_test.py b/tensorflow/python/kernel_tests/where_op_test.py index 3e1fa0a287..17575da6f1 100644 --- a/tensorflow/python/kernel_tests/where_op_test.py +++ b/tensorflow/python/kernel_tests/where_op_test.py @@ -90,6 +90,44 @@ class WhereOpTest(test.TestCase): self._testWhere(x, truth) + def _testRandom(self, dtype, expected_err_re=None): + shape = [127, 33, 53] + x = np.random.randn(*shape) + 1j * np.random.randn(*shape) + x = (np.random.randn(*shape) > 0).astype(dtype) + truth = np.where(np.abs(x) > 0) # Tuples of indices by axis. + truth = np.vstack(truth).T # Convert to [num_true, indices]. + self._testWhere(x, truth, expected_err_re) + + def testRandomBool(self): + self._testRandom(np.bool) + + def testRandomInt32(self): + self._testRandom(np.int32) + + def testRandomInt64(self): + self._testRandom(np.int64) + + def testRandomFloat(self): + self._testRandom(np.float32) + + def testRandomDouble(self): + self._testRandom(np.float64) + + def testRandomComplex64(self): + self._testRandom(np.complex64) + + def testRandomComplex128(self): + self._testRandom(np.complex128) + + def testRandomUint8(self): + self._testRandom(np.uint8) + + def testRandomInt8(self): + self._testRandom(np.int8) + + def testRandomInt16(self): + self._testRandom(np.int16) + def testThreeArgument(self): x = np.array([[-2, 3, -1], [1, -3, -3]]) np_val = np.where(x > 0, x * x, -x) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 5065217f33..3e0cfba90d 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2436,7 +2436,9 @@ def where(condition, x=None, y=None, name=None): ValueError: When exactly one of `x` or `y` is non-None. """ if x is None and y is None: - return gen_array_ops.where(input=condition, name=name) + with ops.name_scope(name, "Where", [condition]) as name: + condition = ops.convert_to_tensor(condition, dtype=dtypes.bool) + return gen_array_ops.where(input=condition, name=name) elif x is not None and y is not None: return gen_math_ops._select(condition=condition, t=x, e=y, name=name) else: -- GitLab From 697262d4ff781fdfb8f70226514d127adad74112 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 10:48:41 -0700 Subject: [PATCH 226/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171700908 --- .../core/ops/compat/ops_history.v1.pbtxt | 39 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 34 ++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 1eafbe138c..2097c587d5 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -40539,6 +40539,45 @@ op { type: DT_INT64 } } +op { + name: "Where" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "index" + type: DT_INT64 + } + attr { + name: "T" + type: "type" + default_value { + type: DT_BOOL + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + type: DT_BOOL + } + } + } +} op { name: "WholeFileReader" output_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 53d99178e5..fc22594ea4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -32447,14 +32447,42 @@ op { name: "Where" input_arg { name: "input" - type: DT_BOOL + type_attr: "T" } output_arg { name: "index" type: DT_INT64 } - summary: "Returns locations of true values in a boolean tensor." - description: "This operation returns the coordinates of true elements in `input`. The\ncoordinates are returned in a 2-D tensor where the first dimension (rows)\nrepresents the number of true elements, and the second dimension (columns)\nrepresents the coordinates of the true elements. Keep in mind, the shape of\nthe output tensor can vary depending on how many true values there are in\n`input`. Indices are output in row-major order.\n\nFor example:\n\n```\n# \'input\' tensor is [[True, False]\n# [True, False]]\n# \'input\' has two true values, so output has two coordinates.\n# \'input\' has rank of 2, so coordinates have two indices.\nwhere(input) ==> [[0, 0],\n [1, 0]]\n\n# `input` tensor is [[[True, False]\n# [True, False]]\n# [[False, True]\n# [False, True]]\n# [[False, False]\n# [False, True]]]\n# \'input\' has 5 true values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n```" + attr { + name: "T" + type: "type" + default_value { + type: DT_BOOL + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + type: DT_BOOL + } + } + } + summary: "Returns locations of nonzero / true values in a tensor." + description: "This operation returns the coordinates of true elements in `input`. The\ncoordinates are returned in a 2-D tensor where the first dimension (rows)\nrepresents the number of true elements, and the second dimension (columns)\nrepresents the coordinates of the true elements. Keep in mind, the shape of\nthe output tensor can vary depending on how many true values there are in\n`input`. Indices are output in row-major order.\n\nFor example:\n\n```\n# \'input\' tensor is [[True, False]\n# [True, False]]\n# \'input\' has two true values, so output has two coordinates.\n# \'input\' has rank of 2, so coordinates have two indices.\nwhere(input) ==> [[0, 0],\n [1, 0]]\n\n# `input` tensor is [[[True, False]\n# [True, False]]\n# [[False, True]\n# [False, True]]\n# [[False, False]\n# [False, True]]]\n# \'input\' has 5 true values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n\n# `input` tensor is [[[1.5, 0.0]\n# [-0.5, 0.0]]\n# [[0.0, 0.25]\n# [0.0, 0.75]]\n# [[0.0, 0.0]\n# [0.0, 0.01]]]\n# \'input\' has 5 nonzero values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n\n# `input` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j]\n# [0.0 + 0.5j, 0.0 + 0.0j]]\n# [[0.0 + 0.0j, 0.25 + 1.5j]\n# [0.0 + 0.0j, 0.75 + 0.0j]]\n# [[0.0 + 0.0j, 0.0 + 0.0j]\n# [0.0 + 0.0j, 0.01 + 0.0j]]]\n# \'input\' has 5 nonzero magnitude values, so output has 5 coordinates.\n# \'input\' has rank of 3, so coordinates have three indices.\nwhere(input) ==> [[0, 0, 0],\n [0, 1, 0],\n [1, 0, 1],\n [1, 1, 1],\n [2, 1, 1]]\n```" } op { name: "WholeFileReader" -- GitLab From 1fe440b368a19d0cf003bb7e4056a93937c57ada Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 10:55:14 -0700 Subject: [PATCH 227/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171701981 --- tensorflow/go/op/wrappers.go | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 804275dda6..9417de3932 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -1262,7 +1262,7 @@ func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { return op.Output(0) } -// Returns locations of true values in a boolean tensor. +// Returns locations of nonzero / true values in a tensor. // // This operation returns the coordinates of true elements in `input`. The // coordinates are returned in a 2-D tensor where the first dimension (rows) @@ -1294,6 +1294,34 @@ func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { // [1, 0, 1], // [1, 1, 1], // [2, 1, 1]] +// +// # `input` tensor is [[[1.5, 0.0] +// # [-0.5, 0.0]] +// # [[0.0, 0.25] +// # [0.0, 0.75]] +// # [[0.0, 0.0] +// # [0.0, 0.01]]] +// # 'input' has 5 nonzero values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// +// # `input` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.5j, 0.0 + 0.0j]] +// # [[0.0 + 0.0j, 0.25 + 1.5j] +// # [0.0 + 0.0j, 0.75 + 0.0j]] +// # [[0.0 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.0j, 0.01 + 0.0j]]] +// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] // ``` func Where(scope *Scope, input tf.Output) (index tf.Output) { if scope.Err() != nil { -- GitLab From 46f0650df68214a3544ec00c1473a7ab14a0f99f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 11:00:30 -0700 Subject: [PATCH 228/909] `name_scope('')` -> `name_scope(None)`. PiperOrigin-RevId: 171702882 --- .../contrib/gan/python/estimator/python/gan_estimator_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index 6e1ee730aa..e89993991a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -238,7 +238,7 @@ def _make_train_gan_model(generator_fn, discriminator_fn, real_data, if add_summaries: if not isinstance(add_summaries, (tuple, list)): add_summaries = [add_summaries] - with ops.name_scope(''): + with ops.name_scope(None): for summary_type in add_summaries: _summary_type_map[summary_type](gan_model) -- GitLab From 90121d582dbad4bd13dd2a9750c3a908e89469dd Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Tue, 10 Oct 2017 14:11:03 -0400 Subject: [PATCH 229/909] Add a tf.contrib.image.translate function (#12306) * Add a tf.contrib.image.translate function * Remove redundant checks from tf.contrib.image.translate. * Add translate and translations_to_projective_transforms to the docstring. * Fix lint errors for tf.contrib.image.translate * Add name_scopes in image_ops. Indicate in the docstrings when the static shape of the arguments must have a known rank. * Fix pyformat's weird docstring indentation. * tf.name_scope -> ops.name_scope * Move the test session inside the _DTYPES loop. * Use the default_name arg of name_scope. * Check for ndims == None * Fix translate docstring and add a comment. * s/vector/matrix/ for the multiple translations. --- tensorflow/contrib/image/__init__.py | 4 + .../python/kernel_tests/image_ops_test.py | 33 +- .../contrib/image/python/ops/image_ops.py | 294 ++++++++++++------ 3 files changed, 224 insertions(+), 107 deletions(-) diff --git a/tensorflow/contrib/image/__init__.py b/tensorflow/contrib/image/__init__.py index 59a322d3ca..d030dffade 100755 --- a/tensorflow/contrib/image/__init__.py +++ b/tensorflow/contrib/image/__init__.py @@ -26,6 +26,8 @@ projective transforms (including rotation) are supported. @@random_yiq_hsv @@rotate @@transform +@@translate +@@translations_to_projective_transforms @@bipartite_match @@single_image_random_dot_stereograms """ @@ -41,6 +43,8 @@ from tensorflow.contrib.image.python.ops.image_ops import angles_to_projective_t from tensorflow.contrib.image.python.ops.image_ops import compose_transforms from tensorflow.contrib.image.python.ops.image_ops import rotate from tensorflow.contrib.image.python.ops.image_ops import transform +from tensorflow.contrib.image.python.ops.image_ops import translate +from tensorflow.contrib.image.python.ops.image_ops import translations_to_projective_transforms from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms import single_image_random_dot_stereograms from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index b8a0706b61..b50177ae56 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -36,8 +36,8 @@ _DTYPES = set( class ImageOpsTest(test_util.TensorFlowTestCase): def test_zeros(self): - with self.test_session(): - for dtype in _DTYPES: + for dtype in _DTYPES: + with self.test_session(): for shape in [(5, 5), (24, 24), (2, 24, 24, 3)]: for angle in [0, 1, np.pi / 2.0]: image = array_ops.zeros(shape, dtype) @@ -46,8 +46,8 @@ class ImageOpsTest(test_util.TensorFlowTestCase): np.zeros(shape, dtype.as_numpy_dtype())) def test_rotate_even(self): - with self.test_session(): - for dtype in _DTYPES: + for dtype in _DTYPES: + with self.test_session(): image = array_ops.reshape( math_ops.cast(math_ops.range(36), dtype), (6, 6)) image_rep = array_ops.tile(image[None, :, :, None], [3, 1, 1, 1]) @@ -68,8 +68,8 @@ class ImageOpsTest(test_util.TensorFlowTestCase): [1, 7, 13, 19, 25, 31], [0, 6, 12, 18, 24, 30]]]) def test_rotate_odd(self): - with self.test_session(): - for dtype in _DTYPES: + for dtype in _DTYPES: + with self.test_session(): image = array_ops.reshape( math_ops.cast(math_ops.range(25), dtype), (5, 5)) image_rep = array_ops.tile(image[None, :, :, None], [3, 1, 1, 1]) @@ -87,9 +87,25 @@ class ImageOpsTest(test_util.TensorFlowTestCase): [22, 17, 12, 7, 2], [23, 18, 13, 8, 3], [24, 19, 14, 9, 4]]]) + def test_translate(self): + for dtype in _DTYPES: + with self.test_session(): + image = constant_op.constant( + [[1, 0, 1, 0], + [0, 1, 0, 1], + [1, 0, 1, 0], + [0, 1, 0, 1]], dtype=dtype) + translation = constant_op.constant([-1, -1], dtypes.float32) + image_translated = image_ops.translate(image, translation) + self.assertAllEqual(image_translated.eval(), + [[1, 0, 1, 0], + [0, 1, 0, 0], + [1, 0, 1, 0], + [0, 0, 0, 0]]) + def test_compose(self): - with self.test_session(): - for dtype in _DTYPES: + for dtype in _DTYPES: + with self.test_session(): image = constant_op.constant( [[1, 1, 1, 0], [1, 0, 0, 0], @@ -246,4 +262,3 @@ class BipartiteMatchTest(test_util.TensorFlowTestCase): if __name__ == "__main__": googletest.main() - diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index aef3e385b5..011ddeaa9a 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -37,16 +37,18 @@ _IMAGE_DTYPES = set( ops.RegisterShape("ImageProjectiveTransform")(common_shapes.call_cpp_shape_fn) -def rotate(images, angles, interpolation="NEAREST"): +def rotate(images, angles, interpolation="NEAREST", name=None): """Rotate image(s) by the passed angle(s) in radians. Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or - (num_rows, num_columns) (HW). + (num_rows, num_columns) (HW). The rank must be statically known (the + shape is not `TensorShape(None)`. angles: A scalar angle to rotate all images by, or (if images has rank 4) a vector of length num_images, with an angle for each image in the batch. interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". + name: The name of the op. Returns: Image(s) with the same type and shape as `images`, rotated by the given @@ -55,38 +57,77 @@ def rotate(images, angles, interpolation="NEAREST"): Raises: TypeError: If `image` is an invalid type. """ - image_or_images = ops.convert_to_tensor(images, name="images") - if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: - raise TypeError("Invalid dtype %s." % image_or_images.dtype) - if len(image_or_images.get_shape()) == 2: - images = image_or_images[None, :, :, None] - elif len(image_or_images.get_shape()) == 3: - images = image_or_images[None, :, :, :] - elif len(image_or_images.get_shape()) == 4: - images = image_or_images - else: - raise TypeError("Images should have rank between 2 and 4.") - - image_height = math_ops.cast(array_ops.shape(images)[1], dtypes.float32)[None] - image_width = math_ops.cast(array_ops.shape(images)[2], dtypes.float32)[None] - output = transform( - images, - angles_to_projective_transforms(angles, image_height, image_width), - interpolation=interpolation) - if len(image_or_images.get_shape()) == 2: - return output[0, :, :, 0] - elif len(image_or_images.get_shape()) == 3: - return output[0, :, :, :] - else: - return output + with ops.name_scope(name, "rotate"): + image_or_images = ops.convert_to_tensor(images) + if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: + raise TypeError("Invalid dtype %s." % image_or_images.dtype) + elif image_or_images.get_shape().ndims is None: + raise TypeError("image_or_images rank must be statically known") + elif len(image_or_images.get_shape()) == 2: + images = image_or_images[None, :, :, None] + elif len(image_or_images.get_shape()) == 3: + images = image_or_images[None, :, :, :] + elif len(image_or_images.get_shape()) == 4: + images = image_or_images + else: + raise TypeError("Images should have rank between 2 and 4.") + + image_height = math_ops.cast(array_ops.shape(images)[1], + dtypes.float32)[None] + image_width = math_ops.cast(array_ops.shape(images)[2], + dtypes.float32)[None] + output = transform( + images, + angles_to_projective_transforms(angles, image_height, image_width), + interpolation=interpolation) + if image_or_images.get_shape().ndims is None: + raise TypeError("image_or_images rank must be statically known") + elif len(image_or_images.get_shape()) == 2: + return output[0, :, :, 0] + elif len(image_or_images.get_shape()) == 3: + return output[0, :, :, :] + else: + return output + + +def translate(images, translations, interpolation="NEAREST", name=None): + """Translate image(s) by the passed vectors(s). + Args: + images: A tensor of shape (num_images, num_rows, num_columns, num_channels) + (NHWC), (num_rows, num_columns, num_channels) (HWC), or + (num_rows, num_columns) (HW). The rank must be statically known (the + shape is not `TensorShape(None)`. + translations: A vector representing [dx, dy] or (if images has rank 4) + a matrix of length num_images, with a [dx, dy] vector for each image in + the batch. + interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". + name: The name of the op. -def angles_to_projective_transforms(angles, image_height, image_width): + Returns: + Image(s) with the same type and shape as `images`, translated by the given + vector(s). Empty space due to the translation will be filled with zeros. + + Raises: + TypeError: If `image` is an invalid type. + """ + with ops.name_scope(name, "translate"): + return transform( + images, + translations_to_projective_transforms(translations), + interpolation=interpolation) + + +def angles_to_projective_transforms(angles, + image_height, + image_width, + name=None): """Returns projective transform(s) for the given angle(s). Args: angles: A scalar angle to rotate all images by, or (for batches of images) - a vector with an angle to rotate each image in the batch. + a vector with an angle to rotate each image in the batch. The rank must + be statically known (the shape is not `TensorShape(None)`. image_height: Height of the image(s) to be transformed. image_width: Width of the image(s) to be transformed. @@ -94,41 +135,89 @@ def angles_to_projective_transforms(angles, image_height, image_width): A tensor of shape (num_images, 8). Projective transforms which can be given to `tf.contrib.image.transform`. """ - angle_or_angles = ops.convert_to_tensor( - angles, name="angles", dtype=dtypes.float32) - if len(angle_or_angles.get_shape()) == 0: # pylint: disable=g-explicit-length-test - angles = angle_or_angles[None] - elif len(angle_or_angles.get_shape()) == 1: - angles = angle_or_angles - else: - raise TypeError("Angles should have rank 0 or 1.") - x_offset = ((image_width - 1) - (math_ops.cos(angles) * - (image_width - 1) - math_ops.sin(angles) * - (image_height - 1))) / 2.0 - y_offset = ((image_height - 1) - (math_ops.sin(angles) * - (image_width - 1) + math_ops.cos(angles) * - (image_height - 1))) / 2.0 - num_angles = array_ops.shape(angles)[0] - return array_ops.concat( - values=[ - math_ops.cos(angles)[:, None], - -math_ops.sin(angles)[:, None], - x_offset[:, None], - math_ops.sin(angles)[:, None], - math_ops.cos(angles)[:, None], - y_offset[:, None], - array_ops.zeros((num_angles, 2), dtypes.float32), - ], - axis=1) - - -def transform(images, transforms, interpolation="NEAREST"): + with ops.name_scope(name, "angles_to_projective_transforms"): + angle_or_angles = ops.convert_to_tensor( + angles, name="angles", dtype=dtypes.float32) + if len(angle_or_angles.get_shape()) == 0: # pylint: disable=g-explicit-length-test + angles = angle_or_angles[None] + elif len(angle_or_angles.get_shape()) == 1: + angles = angle_or_angles + else: + raise TypeError("Angles should have rank 0 or 1.") + x_offset = ((image_width - 1) - (math_ops.cos(angles) * + (image_width - 1) - math_ops.sin(angles) * + (image_height - 1))) / 2.0 + y_offset = ((image_height - 1) - (math_ops.sin(angles) * + (image_width - 1) + math_ops.cos(angles) * + (image_height - 1))) / 2.0 + num_angles = array_ops.shape(angles)[0] + return array_ops.concat( + values=[ + math_ops.cos(angles)[:, None], + -math_ops.sin(angles)[:, None], + x_offset[:, None], + math_ops.sin(angles)[:, None], + math_ops.cos(angles)[:, None], + y_offset[:, None], + array_ops.zeros((num_angles, 2), dtypes.float32), + ], + axis=1) + + +def translations_to_projective_transforms(translations, name=None): + """Returns projective transform(s) for the given translation(s). + + Args: + translations: A 2-element list representing [dx, dy] or a matrix of + 2-element lists representing [dx, dy] to translate for each image + (for a batch of images). The rank must be statically known (the shape + is not `TensorShape(None)`. + name: The name of the op. + + Returns: + A tensor of shape (num_images, 8) projective transforms which can be given + to `tf.contrib.image.transform`. + """ + with ops.name_scope(name, "translations_to_projective_transforms"): + translation_or_translations = ops.convert_to_tensor( + translations, name="translations", dtype=dtypes.float32) + if translation_or_translations.get_shape().ndims is None: + raise TypeError( + "translation_or_translations rank must be statically known") + elif len(translation_or_translations.get_shape()) == 1: + translations = translation_or_translations[None] + elif len(translation_or_translations.get_shape()) == 2: + translations = translation_or_translations + else: + raise TypeError("Translations should have rank 1 or 2.") + num_translations = array_ops.shape(translations)[0] + # The translation matrix looks like: + # [[1 0 -dx] + # [0 1 -dy] + # [0 0 1]] + # where the last entry is implicit. + # Translation matrices are always float32. + return array_ops.concat( + values=[ + array_ops.ones((num_translations, 1), dtypes.float32), + array_ops.zeros((num_translations, 1), dtypes.float32), + -translations[:, 0, None], + array_ops.zeros((num_translations, 1), dtypes.float32), + array_ops.ones((num_translations, 1), dtypes.float32), + -translations[:, 1, None], + array_ops.zeros((num_translations, 2), dtypes.float32), + ], + axis=1) + + +def transform(images, transforms, interpolation="NEAREST", name=None): """Applies the given transform(s) to the image(s). Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or - (num_rows, num_columns) (HW). + (num_rows, num_columns) (HW). The rank must be statically known (the + shape is not `TensorShape(None)`. transforms: Projective transform matrix/matrices. A vector of length 8 or tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point @@ -146,34 +235,40 @@ def transform(images, transforms, interpolation="NEAREST"): Raises: TypeError: If `image` is an invalid type. """ - image_or_images = ops.convert_to_tensor(images, name="images") - transform_or_transforms = ops.convert_to_tensor( - transforms, name="transforms", dtype=dtypes.float32) - if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: - raise TypeError("Invalid dtype %s." % image_or_images.dtype) - if len(image_or_images.get_shape()) == 2: - images = image_or_images[None, :, :, None] - elif len(image_or_images.get_shape()) == 3: - images = image_or_images[None, :, :, :] - elif len(image_or_images.get_shape()) == 4: - images = image_or_images - else: - raise TypeError("Images should have rank between 2 and 4.") - - if len(transform_or_transforms.get_shape()) == 1: - transforms = transform_or_transforms[None] - elif len(transform_or_transforms.get_shape()) == 2: - transforms = transform_or_transforms - else: - raise TypeError("Transforms should have rank 1 or 2.") - output = gen_image_ops.image_projective_transform( - images, transforms, interpolation=interpolation.upper()) - if len(image_or_images.get_shape()) == 2: - return output[0, :, :, 0] - elif len(image_or_images.get_shape()) == 3: - return output[0, :, :, :] - else: - return output + with ops.name_scope(name, "transform"): + image_or_images = ops.convert_to_tensor(images, name="images") + transform_or_transforms = ops.convert_to_tensor( + transforms, name="transforms", dtype=dtypes.float32) + if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: + raise TypeError("Invalid dtype %s." % image_or_images.dtype) + elif image_or_images.get_shape().ndims is None: + raise TypeError("image_or_images rank must be statically known") + elif len(image_or_images.get_shape()) == 2: + images = image_or_images[None, :, :, None] + elif len(image_or_images.get_shape()) == 3: + images = image_or_images[None, :, :, :] + elif len(image_or_images.get_shape()) == 4: + images = image_or_images + else: + raise TypeError("Images should have rank between 2 and 4.") + + if len(transform_or_transforms.get_shape()) == 1: + transforms = transform_or_transforms[None] + elif transform_or_transforms.get_shape().ndims is None: + raise TypeError( + "transform_or_transforms rank must be statically known") + elif len(transform_or_transforms.get_shape()) == 2: + transforms = transform_or_transforms + else: + raise TypeError("Transforms should have rank 1 or 2.") + output = gen_image_ops.image_projective_transform( + images, transforms, interpolation=interpolation.upper()) + if len(image_or_images.get_shape()) == 2: + return output[0, :, :, 0] + elif len(image_or_images.get_shape()) == 3: + return output[0, :, :, :] + else: + return output def compose_transforms(*transforms): @@ -191,11 +286,12 @@ def compose_transforms(*transforms): order. """ assert transforms, "transforms cannot be empty" - composed = _flat_transforms_to_matrices(transforms[0]) - for tr in transforms[1:]: - # Multiply batches of matrices. - composed = math_ops.matmul(composed, _flat_transforms_to_matrices(tr)) - return _transform_matrices_to_flat(composed) + with ops.name_scope("compose_transforms"): + composed = _flat_transforms_to_matrices(transforms[0]) + for tr in transforms[1:]: + # Multiply batches of matrices. + composed = math_ops.matmul(composed, _flat_transforms_to_matrices(tr)) + return _transform_matrices_to_flat(composed) def _flat_transforms_to_matrices(transforms): @@ -211,8 +307,8 @@ def _flat_transforms_to_matrices(transforms): def _transform_matrices_to_flat(transform_matrices): # Flatten each matrix. - transforms = array_ops.reshape( - transform_matrices, constant_op.constant([-1, 9])) + transforms = array_ops.reshape(transform_matrices, + constant_op.constant([-1, 9])) # Divide each matrix by the last entry (normally 1). transforms /= transforms[:, 8:9] return transforms[:, :8] @@ -260,10 +356,10 @@ def _image_projective_transform_grad(op, grad): return [output, None] -def bipartite_match( - distance_mat, - num_valid_rows, - top_k=-1): +def bipartite_match(distance_mat, + num_valid_rows, + top_k=-1, + name="bipartite_match"): """Find bipartite matching based on a given distance matrix. A greedy bi-partite matching algorithm is used to obtain the matching with @@ -282,6 +378,7 @@ def bipartite_match( top_k: A scalar that specifies the number of top-k matches to retrieve. If set to be negative, then is set according to the maximum number of matches from `distance_mat`. + name: The name of the op. Returns: row_to_col_match_indices: A vector of length num_rows, which is the number @@ -292,7 +389,8 @@ def bipartite_match( If `col_to_row_match_indices[j]` is not -1, column j is matched to row `col_to_row_match_indices[j]`. """ - result = gen_image_ops.bipartite_match(distance_mat, num_valid_rows, top_k) + result = gen_image_ops.bipartite_match( + distance_mat, num_valid_rows, top_k, name=name) return result -- GitLab From cbd2974ed583ed725c33c22000a1a357cc30e46b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 11:16:59 -0700 Subject: [PATCH 230/909] Adding comment to documentation of tf.image.crop_and_resize about it being corner aligned. PiperOrigin-RevId: 171706213 --- tensorflow/core/ops/image_ops.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index 1453943d78..a44bac60bf 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -1101,7 +1101,10 @@ slice from the input image and does not allow resizing or aspect ratio change. Returns a tensor with `crops` from the input `image` at positions defined at the bounding box locations in `boxes`. The cropped boxes are all resized (with bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The -result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. +result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The +resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the +method will give identical results to using `tf.image.resize_bilinear()` +with `align_corners=True`. image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. -- GitLab From 2446c53c8c9510f881f6193c91be21b8e8a9a488 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 11:31:20 -0700 Subject: [PATCH 231/909] Update the base image for TF CPU remote build image * `clang-debian8` from Cloud Launcher will be used directly , without building from source https://console.cloud.google.com/launcher/details/google/clang-debian8?filter=category:developer-tools&q=clang PiperOrigin-RevId: 171708832 --- .../tools/ci_build/remote/Dockerfile.cpu | 2 +- .../ci_build/remote/remote_docker_build.sh | 32 +++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/tools/ci_build/remote/Dockerfile.cpu b/tensorflow/tools/ci_build/remote/Dockerfile.cpu index 04365f12d6..7b01d8320d 100644 --- a/tensorflow/tools/ci_build/remote/Dockerfile.cpu +++ b/tensorflow/tools/ci_build/remote/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM debian8-clang:latest +FROM launcher.gcr.io/google/clang-debian8:latest RUN apt-get update && apt-get --no-install-recommends install -y \ binutils \ diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/remote/remote_docker_build.sh index 0ac1165dcd..3ac6840f4e 100755 --- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh +++ b/tensorflow/tools/ci_build/remote/remote_docker_build.sh @@ -88,25 +88,25 @@ function print_usage { } +# Build nvidia-cuba-clang base image for GPU image. +# For CPU the `clang-debian8` from Cloud Launcher will be used directly: +# https://console.cloud.google.com/launcher/details/google/clang-debian8?filter=category:developer-tools&q=clang function build_base_image { - if [ "$cpu_build" = true ] ; then - base_image="debian8" - else + if [ "$gpu_build" = true ] ; then base_image="nvidia-cuda" + # Run a 2-stage build for clang base image, see + # https://github.com/llvm-mirror/llvm/blob/master/docs/Docker.rst + $base_image_build_script \ + --source $base_image \ + --branch branches/google/stable \ + --docker-repository ${base_image}-clang --docker-tag "latest" \ + -p clang -i stage2-install-clang -i stage2-install-clang-headers \ + -- \ + -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ + -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ + -DCLANG_ENABLE_BOOTSTRAP=ON \ + -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers" fi - - # Run a 2-stage build for clang base image, see - # https://github.com/llvm-mirror/llvm/blob/master/docs/Docker.rst - $base_image_build_script \ - --source $base_image \ - --branch branches/google/stable \ - --docker-repository ${base_image}-clang --docker-tag "latest" \ - -p clang -i stage2-install-clang -i stage2-install-clang-headers \ - -- \ - -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ - -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ - -DCLANG_ENABLE_BOOTSTRAP=ON \ - -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers" } -- GitLab From afdfb5ac9807223cf3c21515a794ae7216f59700 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 11:35:11 -0700 Subject: [PATCH 232/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171709536 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index fc22594ea4..fcb5792e5c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5631,7 +5631,7 @@ op { description: "Value used for extrapolation, when applicable." } summary: "Extracts crops from the input image tensor and bilinearly resizes them (possibly" - description: "with aspect ratio change) to a common output size specified by `crop_size`. This\nis more general than the `crop_to_bounding_box` op which extracts a fixed size\nslice from the input image and does not allow resizing or aspect ratio change.\n\nReturns a tensor with `crops` from the input `image` at positions defined at the\nbounding box locations in `boxes`. The cropped boxes are all resized (with\nbilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The\nresult is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`." + description: "with aspect ratio change) to a common output size specified by `crop_size`. This\nis more general than the `crop_to_bounding_box` op which extracts a fixed size\nslice from the input image and does not allow resizing or aspect ratio change.\n\nReturns a tensor with `crops` from the input `image` at positions defined at the\nbounding box locations in `boxes`. The cropped boxes are all resized (with\nbilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The\nresult is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The\nresizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the\nmethod will give identical results to using `tf.image.resize_bilinear()`\nwith `align_corners=True`." } op { name: "CropAndResizeGradBoxes" -- GitLab From 651b7d587bc366bf93b551b3df2b44cf9fb53c71 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 11:43:14 -0700 Subject: [PATCH 233/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171710900 --- tensorflow/go/op/wrappers.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 9417de3932..96a1c2695a 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8523,7 +8523,10 @@ func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { // Returns a tensor with `crops` from the input `image` at positions defined at the // bounding box locations in `boxes`. The cropped boxes are all resized (with // bilinear interpolation) to a fixed `size = [crop_height, crop_width]`. The -// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. +// result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`. The +// resizing is corner aligned. In particular, if `boxes = [[0, 0, 1, 1]]`, the +// method will give identical results to using `tf.image.resize_bilinear()` +// with `align_corners=True`. // // Arguments: // image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -- GitLab From 253f5386cb6478dba6d9b99286775c6cbbe86a9a Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 10 Oct 2017 12:09:42 -0700 Subject: [PATCH 234/909] eager: Fix an issue with tf.identity. Like with graph execution, tf.identity should accept an input that is not a Tensor instance but can be converted to one. PiperOrigin-RevId: 171714919 --- tensorflow/python/eager/ops_test.py | 3 +++ tensorflow/python/ops/array_ops.py | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 6d17c7eeff..7d54b8d2d8 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -299,6 +299,9 @@ class OpsTest(test_util.TensorFlowTestCase): y = flatten_layer(x) self.assertAllEqual([[-10, -20, -30, -40], [10, 20, 30, 40]], y.numpy()) + def testIdentity(self): + self.assertEqual(2, array_ops.identity(2).numpy()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 3e0cfba90d..61405e3f45 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -124,7 +124,13 @@ def identity(input, name=None): # pylint: disable=redefined-builtin if context.in_graph_mode(): return gen_array_ops.identity(input, name=name) else: - if context.context().device_name != input.device: + try: + in_device = input.device + except AttributeError: + input = ops.convert_to_tensor(input) + in_device = input.device + # TODO(ashankar): Does 'identity' need to invoke execution callbacks? + if context.context().device_name != in_device: return input._copy() # pylint: disable=protected-access return input -- GitLab From 9954458183ebd8d0ab5f7d06f063c8372dbcf6fb Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 10 Oct 2017 12:14:35 -0700 Subject: [PATCH 235/909] Define truncatemod in terms of tf.truncatediv to be explicit. PiperOrigin-RevId: 171715629 --- tensorflow/core/ops/math_ops.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 015fd6e388..ab0bc258f7 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -692,8 +692,8 @@ REGISTER_OP("Mod") .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns element-wise remainder of division. This emulates C semantics in that -the result here is consistent with a truncating divide. E.g. `truncate(x / y) * -y + truncate_mod(x, y) = x`. +the result here is consistent with a truncating divide. E.g. +`tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. *NOTE*: `Mod` supports broadcasting. More about broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -- GitLab From 721fbda83fc0cb00c9bf9ed461c8fc3084f42fe1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 12:20:59 -0700 Subject: [PATCH 236/909] [TF:XLA] Rename BINOP_LOGICAL_X to BINOP_X PiperOrigin-RevId: 171716540 --- .../compiler/xla/client/computation_builder.cc | 6 +++--- tensorflow/compiler/xla/service/shape_inference.cc | 12 ++++++------ tensorflow/compiler/xla/service/user_computation.cc | 6 +++--- tensorflow/compiler/xla/xla_data.proto | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 4757e8b0d2..cbd71dad86 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -958,18 +958,18 @@ ComputationDataHandle ComputationBuilder::Min( ComputationDataHandle ComputationBuilder::And( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return BinaryOp(BINOP_LOGICAL_AND, lhs, rhs, broadcast_dimensions); + return BinaryOp(BINOP_AND, lhs, rhs, broadcast_dimensions); } ComputationDataHandle ComputationBuilder::Or( const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions) { - return BinaryOp(BINOP_LOGICAL_OR, lhs, rhs, broadcast_dimensions); + return BinaryOp(BINOP_OR, lhs, rhs, broadcast_dimensions); } ComputationDataHandle ComputationBuilder::Not( const ComputationDataHandle& operand) { - return UnaryOp(UNOP_LOGICAL_NOT, operand); + return UnaryOp(UNOP_NOT, operand); } ComputationDataHandle ComputationBuilder::Abs( diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 06a68c81e4..b333d232a7 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -58,7 +58,7 @@ UnaryOperation OpcodeToUnaryOperation(HloOpcode opcode) { case HloOpcode::kLog: return UNOP_LOG; case HloOpcode::kNot: - return UNOP_LOGICAL_NOT; + return UNOP_NOT; case HloOpcode::kNegate: return UNOP_NEGATE; case HloOpcode::kRoundNearestAfz: @@ -114,9 +114,9 @@ BinaryOperation OpcodeToBinaryOperation(HloOpcode opcode) { case HloOpcode::kRemainder: return BINOP_REM; case HloOpcode::kOr: - return BINOP_LOGICAL_OR; + return BINOP_OR; case HloOpcode::kAnd: - return BINOP_LOGICAL_AND; + return BINOP_AND; default: LOG(FATAL) << "unhandled opcode " << opcode; } @@ -322,7 +322,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_SORT: return arg; - case UNOP_LOGICAL_NOT: + case UNOP_NOT: if (arg.element_type() != PRED) { return InvalidArgument( "expected pred element type in argument to logical-not operation; " @@ -750,8 +750,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InferElementwiseBinaryOpShape(operation, lhs, rhs, broadcast_dimensions); - case BINOP_LOGICAL_AND: - case BINOP_LOGICAL_OR: + case BINOP_AND: + case BINOP_OR: if (lhs.element_type() != PRED) { return InvalidArgument( "expected pred element type in argument to logical and/or " diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 05f5476b88..317817d022 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -58,7 +58,7 @@ HloOpcode UnaryOperationToHloOpcode(UnaryOperation unop) { return HloOpcode::kIsFinite; case UNOP_LOG: return HloOpcode::kLog; - case UNOP_LOGICAL_NOT: + case UNOP_NOT: return HloOpcode::kNot; case UNOP_NEGATE: return HloOpcode::kNegate; @@ -111,9 +111,9 @@ HloOpcode BinaryOperationToHloOpcode(BinaryOperation binop) { return HloOpcode::kPower; case BINOP_REM: return HloOpcode::kRemainder; - case BINOP_LOGICAL_OR: + case BINOP_OR: return HloOpcode::kOr; - case BINOP_LOGICAL_AND: + case BINOP_AND: return HloOpcode::kAnd; default: LOG(FATAL) << "unhandled operation " << binop; diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 1771a3d5de..3f26b88809 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -618,7 +618,7 @@ enum UnaryOperation { UNOP_INVALID = 0; // Elementwise, logical negation - UNOP_LOGICAL_NOT = 1; + UNOP_NOT = 1; // Elementwise, computes e^x. UNOP_EXP = 2; @@ -707,8 +707,8 @@ enum BinaryOperation { BINOP_REM = 17; // Logical operators - BINOP_LOGICAL_AND = 18; - BINOP_LOGICAL_OR = 19; + BINOP_AND = 18; + BINOP_OR = 19; } message BinaryOpRequest { -- GitLab From 803707b01fdc3048347f6e1b3aca751cf699b1e8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 12:21:24 -0700 Subject: [PATCH 237/909] Update ops-related pbtxt files. PiperOrigin-RevId: 171716595 --- tensorflow/core/ops/ops.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index fcb5792e5c..7579aef259 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -14434,7 +14434,7 @@ op { } } summary: "Returns element-wise remainder of division. This emulates C semantics in that" - description: "the result here is consistent with a truncating divide. E.g. `truncate(x / y) *\ny + truncate_mod(x, y) = x`.\n\n*NOTE*: `Mod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)" + description: "the result here is consistent with a truncating divide. E.g.\n`tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.\n\n*NOTE*: `Mod` supports broadcasting. More about broadcasting\n[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)" } op { name: "Mul" -- GitLab From 35c4177d9e2349e4b5c6875e85220fc3f8ddc17c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 12:22:58 -0700 Subject: [PATCH 238/909] Allow tensorflow devices to report their load. This may be used to improve batch scheduling. PiperOrigin-RevId: 171716813 --- tensorflow/stream_executor/stream_executor_internal.h | 2 ++ tensorflow/stream_executor/stream_executor_pimpl.cc | 4 ++++ tensorflow/stream_executor/stream_executor_pimpl.h | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h index 802ef755eb..12593e31d4 100644 --- a/tensorflow/stream_executor/stream_executor_internal.h +++ b/tensorflow/stream_executor/stream_executor_internal.h @@ -225,6 +225,8 @@ class StreamExecutorInterface { virtual port::Status SetDeviceSharedMemoryConfig( SharedMemoryConfig config) = 0; + virtual int64 GetDeviceLoad() { return -1; } + virtual bool DeviceMemoryUsage(int64 *free, int64 *total) const { return false; } diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index 9bbfe7f04a..9dc1749327 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -268,6 +268,10 @@ const DeviceDescription &StreamExecutor::GetDeviceDescription() const { return *device_description_; } +int64 StreamExecutor::GetDeviceLoad() const { + return implementation_->GetDeviceLoad(); +} + int StreamExecutor::PlatformDeviceCount() const { return implementation_->PlatformDeviceCount(); } diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index f354317a6e..9c225e5fae 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -312,6 +312,10 @@ class StreamExecutor { // The value is cached on first use. const DeviceDescription &GetDeviceDescription() const; + // If implemented, returns device specific measurement of load + // (e.g. pending requests). + int64 GetDeviceLoad() const; + // Returns the underlying device memory usage information, if it is available. // If it is not available (false is returned), free/total may not be // initialized. -- GitLab From 97fa3e4b87e20ecf6c68225812056345aca5f4cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 12:28:53 -0700 Subject: [PATCH 239/909] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 171717474 --- tensorflow/go/op/wrappers.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 96a1c2695a..cf842f3808 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8203,8 +8203,8 @@ func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { // Returns element-wise remainder of division. This emulates C semantics in that // -// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * -// y + truncate_mod(x, y) = x`. +// the result here is consistent with a truncating divide. E.g. +// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. // // *NOTE*: `Mod` supports broadcasting. More about broadcasting // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -- GitLab From 70e2cbfeb6dc9ba9c01a93405cd64fab90ef0b2e Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Tue, 10 Oct 2017 12:29:36 -0700 Subject: [PATCH 240/909] Add an env-var to choose between FP16 and FP32 as the internal compute type for conv when input data is FP16. The env-var is set to use FP32 by default. PiperOrigin-RevId: 171717550 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 147 +++++++++++--------- 1 file changed, 83 insertions(+), 64 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 46516cc445..039f7ea029 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2079,6 +2079,85 @@ dnn::AlgorithmDesc GetCudnnConvolutionForwardAlgorithm( return dnn::AlgorithmDesc(algo, use_tensor_ops); } +// A helper class to set env-vars and choose options for cudnn-related +// algorithms. +template +class CudnnEnvVar { + public: + static bool IsEnabled() { + static bool is_enabled = IsEnabledImpl(); + return is_enabled; + } + + private: + static bool IsEnabledImpl() { + const char* tf_env_var_val = getenv(EnvVar::kName); + if (tf_env_var_val != nullptr) { + port::StringPiece tf_env_var_val_str(tf_env_var_val); + if (tf_env_var_val_str == "0") { + return false; + } + return true; + } + return EnvVar::kDefaultFlag; + } +}; + +// A helper struct to decide whether to enable the FFT_TILING algorithms for +// forward convolution. Before cudnn v5.1 it works fine but since cudnn v5.1 +// it is turned off due to memory corruption caused by some shapes with this +// algorithm. +// Before NVIDIA fixes the memory corruption bug, users can explicitly +// enable the algorithm through an env-var "TF_ENABLE_FFT_TILING_FORWARD=1". +struct FftTilingForward { + static constexpr const char* kName = "TF_ENABLE_FFT_TILING_FORWARD"; + // TODO(yangzihao): turn the default to True when the memory corruption bug + // is fixed. + static constexpr bool kDefaultFlag = CUDNN_VERSION < 5100; +}; + +// A helper struct to decide whether to enable the WINOGRAD_NONFUSED algorithms. +// By default it is turned on, users can explicitly disable them through an +// env-var "TF_ENABLE_WINOGRAD_NONFUSED=0". +// https://github.com/tensorflow/tensorflow/pull/4901 +struct WinogradNonfused { + static constexpr const char* kName = "TF_ENABLE_WINOGRAD_NONFUSED"; + // NVIDIA has fixed winograd nonfused bug for cudnn v>=7. + // For cudnn v>=5.1, we have a workaround and for any lower version, we + // disable it by default. + static constexpr bool kDefaultFlag = CUDNN_VERSION >= 5100; +}; + +// A helper struct to decide whether to use FP32 as the internal compute type +// for convolution when the input data type is FP16. By default it is turned on, +// users can explicitly disable them (choose to use FP16 as the internal compute +// type) through an env-var "TF_FP16_CONV_USE_FP32_COMPUTE=0". +struct ConvDoFP32ComputationFP16Input { + static constexpr const char* kName = "TF_FP16_CONV_USE_FP32_COMPUTE"; + // Using FP16 as the internal compute type for convolution when the input data + // type is FP16 is only supported on architectures with true fp16 support + // (compute capability 5.3 and 6.0). Setting this to false in an unsupported + // architecture will cause internal errors. + static constexpr bool kDefaultFlag = true; +}; + +// A group of helper functions to return the internal compute type for +// convolutions in cudnn. +// TODO(yangzihao): Add support for float64. +template +cudnnDataType_t GetConvComputeType() { + return CUDNN_DATA_FLOAT; +} + +template <> +cudnnDataType_t GetConvComputeType() { + if (CudnnEnvVar::IsEnabled()) { + return CUDNN_DATA_FLOAT; + } else { + return CUDNN_DATA_HALF; + } +} + } // namespace template @@ -2098,12 +2177,8 @@ bool CudnnSupport::DoConvolveImpl( static_cast(cudnn_type)}; ScopedFilterDescriptor filter{parent_, filter_descriptor, batch_descriptor, static_cast(cudnn_type)}; - // TODO(sesse): Figure out under what circumstances cuDNN would - // accept CUDNN_DATA_HALF here; probably related to compute capability - // and cuDNN version; at least cuDNN 4 on TITAN X only supports - // CUDNN_DATA_FLOAT even for half input. ScopedConvolutionDescriptor conv{parent_, convolution_descriptor, - CUDNN_DATA_FLOAT}; + GetConvComputeType()}; mutex_lock lock{dnn_handle_mutex_}; auto status = wrap::cudnnSetStream(parent_, ToHandle(dnn_handle_), @@ -2424,55 +2499,6 @@ bool CudnnSupport::DoFusedConvolveImpl( #endif // CUDNN_VERSION < 6000 } -// A helper class to set env-vars and choose options for cudnn-related -// algorithms. -template -class CudnnEnvVar { - public: - static bool IsEnabled() { - static bool is_enabled = IsEnabledImpl(); - return is_enabled; - } - - private: - static bool IsEnabledImpl() { - const char* tf_env_var_val = getenv(EnvVar::kName); - if (tf_env_var_val != nullptr) { - port::StringPiece tf_env_var_val_str(tf_env_var_val); - if (tf_env_var_val_str == "0") { - return false; - } - return true; - } - return EnvVar::kDefaultFlag; - } -}; - -// A helper struct to decide whether to enable the FFT_TILING algorithms for -// forward convolution. Before cudnn v5.1 it works fine but since cudnn v5.1 -// it is turned off due to memory corruption caused by some shapes with this -// algorithm. -// Before NVIDIA fixes the memory corruption bug, users can explicitly -// enable the algorithm through an env-var "TF_ENABLE_FFT_TILING_FORWARD=1". -struct FftTilingForward { - static constexpr const char* kName = "TF_ENABLE_FFT_TILING_FORWARD"; - // TODO(yangzihao): turn the default to True when the memory corruption bug - // is fixed. - static constexpr bool kDefaultFlag = CUDNN_VERSION < 5100; -}; - -// A helper struct to decide whether to enable the WINOGRAD_NONFUSED algorithms. -// By default it is turned on, users can explicitly disable them through an -// env-var "TF_ENABLE_WINOGRAD_NONFUSED=0". -// https://github.com/tensorflow/tensorflow/pull/4901 -struct WinogradNonfused { - static constexpr const char* kName = "TF_ENABLE_WINOGRAD_NONFUSED"; - // NVIDIA has fixed winograd nonfused bug for cudnn v>=7. - // For cudnn v>=5.1, we have a workaround and for any lower version, we - // disable it by default. - static constexpr bool kDefaultFlag = CUDNN_VERSION >= 5100; -}; - bool CudnnSupport::GetConvolveAlgorithms( bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms) { @@ -2990,12 +3016,8 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( static_cast(cudnn_type)}; ScopedFilterDescriptor filter{parent_, filter_descriptor, input_descriptor, static_cast(cudnn_type)}; - // TODO(sesse): Figure out under what circumstances cuDNN would - // accept CUDNN_DATA_HALF here; probably related to compute capability - // and cuDNN version; at least cuDNN 4 on TITAN X only supports - // CUDNN_DATA_FLOAT even for half input. ScopedConvolutionDescriptor conv{parent_, convolution_descriptor, - CUDNN_DATA_FLOAT}; + GetConvComputeType()}; const bool is_profiling = output_profile_result != nullptr; cudnnConvolutionBwdDataAlgo_t algo; @@ -3245,12 +3267,8 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( static_cast(cudnn_type)}; ScopedFilterDescriptor filter{parent_, filter_descriptor, input_descriptor, static_cast(cudnn_type)}; - // TODO(sesse): Figure out under what circumstances cuDNN would - // accept CUDNN_DATA_HALF here; probably related to compute capability - // and cuDNN version; at least cuDNN 4 on TITAN X only supports - // CUDNN_DATA_FLOAT even for half input. ScopedConvolutionDescriptor conv{parent_, convolution_descriptor, - CUDNN_DATA_FLOAT}; + GetConvComputeType()}; const bool is_profiling = output_profile_result != nullptr; cudnnConvolutionBwdFilterAlgo_t algo; @@ -3403,6 +3421,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( /*beta=*/&beta, /*gradDesc=*/filter.handle(), /*gradData=*/backward_filter_data->opaque()); + if (is_profiling) { timer->Stop(AsCUDAStream(stream)); if (status == CUDNN_STATUS_SUCCESS) { -- GitLab From 30e40833147f04467b791b9faad3284504194eb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 12:29:45 -0700 Subject: [PATCH 241/909] Fix bug in peephole implementation of BlockLSTM Cell. Fix tests. PiperOrigin-RevId: 171717566 --- .../rnn/python/kernel_tests/lstm_ops_test.py | 106 +++++++++++------- tensorflow/contrib/rnn/python/ops/lstm_ops.py | 36 +++--- 2 files changed, 82 insertions(+), 60 deletions(-) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index 3016821b74..3f72203594 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -304,7 +304,7 @@ class LSTMBlockCellTest(test.TestCase): batch_size = 2 input_size = 3 cell_size = 4 - sequence_length = 5 + sequence_length = 4 inputs = [] for _ in range(sequence_length): @@ -314,38 +314,49 @@ class LSTMBlockCellTest(test.TestCase): initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=19890212) - with variable_scope.variable_scope("basic", initializer=initializer): - cell = rnn_cell.LSTMCell( - cell_size, use_peepholes=True, state_is_tuple=True) - outputs, state = rnn.static_rnn(cell, inputs, dtype=dtypes.float32) - sess.run([variables.global_variables_initializer()]) - basic_outputs, basic_state = sess.run([outputs, state[0]]) - basic_grads = sess.run(gradients_impl.gradients(outputs, inputs)) - basic_wgrads = sess.run( - gradients_impl.gradients(outputs, variables.trainable_variables())) + with variable_scope.variable_scope("test", initializer=initializer): + # magic naming so that the cells pick up these variables and resuse them + wci = variable_scope.get_variable( + "rnn/lstm_cell/w_i_diag", shape=[cell_size], dtype=dtypes.float32) + wcf = variable_scope.get_variable( + "rnn/lstm_cell/w_f_diag", shape=[cell_size], dtype=dtypes.float32) + wco = variable_scope.get_variable( + "rnn/lstm_cell/w_o_diag", shape=[cell_size], dtype=dtypes.float32) - with variable_scope.variable_scope("block", initializer=initializer): w = variable_scope.get_variable( - "w", + "rnn/lstm_cell/kernel", shape=[input_size + cell_size, cell_size * 4], dtype=dtypes.float32) b = variable_scope.get_variable( - "b", + "rnn/lstm_cell/bias", shape=[cell_size * 4], dtype=dtypes.float32, initializer=init_ops.zeros_initializer()) - wci = variable_scope.get_variable( - "wci", shape=[cell_size], dtype=dtypes.float32) - wcf = variable_scope.get_variable( - "wcf", shape=[cell_size], dtype=dtypes.float32) - wco = variable_scope.get_variable( - "wco", shape=[cell_size], dtype=dtypes.float32) - - _, _, _, _, _, _, outputs = block_lstm( - ops.convert_to_tensor( - sequence_length, dtype=dtypes.int64), + wci_block = variable_scope.get_variable( + "rnn/lstm_cell/lstm_block_wrapper/w_i_diag", + initializer=wci.initialized_value()) + wcf_block = variable_scope.get_variable( + "rnn/lstm_cell/lstm_block_wrapper/w_f_diag", + initializer=wcf.initialized_value()) + wco_block = variable_scope.get_variable( + "rnn/lstm_cell/lstm_block_wrapper/w_o_diag", + initializer=wco.initialized_value()) + w_block = variable_scope.get_variable( + "rnn/lstm_cell/lstm_block_wrapper/kernel", + initializer=w.initialized_value()) + b_block = variable_scope.get_variable( + "rnn/lstm_cell/lstm_block_wrapper/bias", + initializer=b.initialized_value()) + + basic_cell = rnn_cell.LSTMCell( + cell_size, use_peepholes=True, state_is_tuple=True, reuse=True) + basic_outputs_op, basic_state_op = rnn.static_rnn( + basic_cell, inputs, dtype=dtypes.float32) + + _, _, _, _, _, _, block_outputs_op = block_lstm( + ops.convert_to_tensor(sequence_length, dtype=dtypes.int64), inputs, w, b, @@ -355,36 +366,45 @@ class LSTMBlockCellTest(test.TestCase): cell_clip=0, use_peephole=True) + with variable_scope.variable_scope("rnn/lstm_cell", reuse=True): + fused_cell = lstm_ops.LSTMBlockFusedCell( + cell_size, cell_clip=0, use_peephole=True) + fused_outputs_op, fused_state_op = fused_cell( + inputs, dtype=dtypes.float32) + sess.run([variables.global_variables_initializer()]) - block_outputs = sess.run(outputs) - block_grads = sess.run(gradients_impl.gradients(outputs, inputs)) + basic_outputs, basic_state = sess.run( + [basic_outputs_op, basic_state_op[0]]) + basic_grads = sess.run( + gradients_impl.gradients(basic_outputs_op, inputs)) + basic_wgrads = sess.run( + gradients_impl.gradients(basic_outputs_op, [w, b, wci, wcf, wco])) + + block_outputs = sess.run(block_outputs_op) + block_grads = sess.run( + gradients_impl.gradients(block_outputs_op, inputs)) block_wgrads = sess.run( - gradients_impl.gradients(outputs, [w, b, wci, wcf, wco])) + gradients_impl.gradients(block_outputs_op, [w, b, wci, wcf, wco])) + + fused_outputs, fused_state = sess.run( + [fused_outputs_op, fused_state_op[0]]) + fused_grads = sess.run( + gradients_impl.gradients(fused_outputs_op, inputs)) + fused_wgrads = sess.run( + gradients_impl.gradients( + fused_outputs_op, + [w_block, b_block, wci_block, wcf_block, wco_block])) self.assertAllClose(basic_outputs, block_outputs) self.assertAllClose(basic_grads, block_grads) for basic, block in zip(basic_wgrads, block_wgrads): - self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2) - - with variable_scope.variable_scope("fused", initializer=initializer): - cell = lstm_ops.LSTMBlockFusedCell( - cell_size, cell_clip=0, use_peephole=True) - outputs, state = cell(inputs, dtype=dtypes.float32) - - sess.run([variables.global_variables_initializer()]) - fused_outputs, fused_state = sess.run([outputs, state[0]]) - fused_grads = sess.run(gradients_impl.gradients(outputs, inputs)) - fused_vars = [ - v for v in variables.trainable_variables() - if v.name.startswith("fused/") - ] - fused_wgrads = sess.run(gradients_impl.gradients(outputs, fused_vars)) + self.assertAllClose(basic, block, rtol=1e-6, atol=1e-6) self.assertAllClose(basic_outputs, fused_outputs) self.assertAllClose(basic_state, fused_state) self.assertAllClose(basic_grads, fused_grads) - for basic, fused in zip(basic_wgrads, fused_wgrads): - self.assertAllClose(basic, fused, rtol=1e-2, atol=1e-2) + for basic, fused in zip(block_wgrads, fused_wgrads): + self.assertAllClose(basic, fused, rtol=1e-6, atol=1e-6) def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 352dae3acf..df910a3423 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -116,8 +116,8 @@ def _lstm_block_cell(x, if cell_size is None: raise ValueError("cell_size from `cs_prev` should not be None.") wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size]) - wco = wci wcf = wci + wco = wci # pylint: disable=protected-access return gen_lstm_ops.lstm_block_cell( @@ -126,8 +126,8 @@ def _lstm_block_cell(x, h_prev=h_prev, w=w, wci=wci, - wco=wco, wcf=wcf, + wco=wco, b=b, forget_bias=forget_bias, cell_clip=cell_clip if cell_clip is not None else -1, @@ -201,8 +201,8 @@ def _block_lstm(seq_len_max, h_prev = zero_state if wci is None: wci = array_ops.constant(0, dtype=dtypes.float32, shape=[cell_size]) - wco = wci wcf = wci + wco = wci # pylint: disable=protected-access i, cs, f, o, ci, co, h = gen_lstm_ops.block_lstm( @@ -212,8 +212,8 @@ def _block_lstm(seq_len_max, h_prev=h_prev, w=w, wci=wci, - wco=wco, wcf=wcf, + wco=wco, b=b, forget_bias=forget_bias, cell_clip=cell_clip if cell_clip is not None else -1, @@ -233,7 +233,7 @@ _lstm_block_cell_grad_outputs = ["cs_prev_grad", "dicfo"] @ops.RegisterGradient("LSTMBlockCell") def _LSTMBlockCellGrad(op, *grad): """Gradient for LSTMBlockCell.""" - (x, cs_prev, h_prev, w, wci, wco, wcf, b) = op.inputs + (x, cs_prev, h_prev, w, wci, wcf, wco, b) = op.inputs (i, cs, f, o, ci, co, _) = op.outputs (_, cs_grad, _, _, _, _, h_grad) = grad @@ -293,13 +293,13 @@ def _LSTMBlockCellGrad(op, *grad): @ops.RegisterGradient("BlockLSTM") def _BlockLSTMGrad(op, *grad): """Gradient for BlockLSTM.""" - seq_len_max, x, cs_prev, h_prev, w, wci, wco, wcf, b = op.inputs + seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b = op.inputs i, cs, f, o, ci, co, h = op.outputs cs_grad = grad[1] h_grad = grad[6] - (x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wco_grad, wcf_grad, + (x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wcf_grad, wco_grad, b_grad) = gen_lstm_ops.block_lstm_grad( seq_len_max, x, @@ -307,8 +307,8 @@ def _BlockLSTMGrad(op, *grad): h_prev, w, wci, - wco, wcf, + wco, b, i, cs, @@ -321,8 +321,10 @@ def _BlockLSTMGrad(op, *grad): h_grad, use_peephole=op.get_attr("use_peephole")) - return [None, x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wco_grad, - wcf_grad, b_grad] + return [ + None, x_grad, cs_prev_grad, h_prev_grad, w_grad, wci_grad, wcf_grad, + wco_grad, b_grad + ] class LSTMBlockCell(rnn_cell_impl.RNNCell): @@ -367,8 +369,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell): "W": "kernel", "b": "bias", "wci": "w_i_diag", - "wco": "w_o_diag", "wcf": "w_f_diag", + "wco": "w_o_diag", "scope": "lstm_cell" } @@ -396,10 +398,10 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell): initializer=init_ops.constant_initializer(0.0)) if self._use_peephole: wci = vs.get_variable(self._names["wci"], [self._num_units]) - wco = vs.get_variable(self._names["wco"], [self._num_units]) wcf = vs.get_variable(self._names["wcf"], [self._num_units]) + wco = vs.get_variable(self._names["wco"], [self._num_units]) else: - wci = wco = wcf = array_ops.zeros([self._num_units]) + wci = wcf = wco = array_ops.zeros([self._num_units]) (cs_prev, h_prev) = states_prev (_, cs, _, _, _, _, h) = _lstm_block_cell( x, @@ -408,8 +410,8 @@ class LSTMBlockCell(rnn_cell_impl.RNNCell): w, b, wci=wci, - wco=wco, wcf=wcf, + wco=wco, forget_bias=self._forget_bias, cell_clip=self._cell_clip, use_peephole=self._use_peephole) @@ -644,10 +646,10 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): dtype=dtype) if self._use_peephole: wci = vs.get_variable("w_i_diag", [self._num_units], dtype=dtype) - wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype) wcf = vs.get_variable("w_f_diag", [self._num_units], dtype=dtype) + wco = vs.get_variable("w_o_diag", [self._num_units], dtype=dtype) else: - wci = wco = wcf = array_ops.zeros([self._num_units], dtype=dtype) + wci = wcf = wco = array_ops.zeros([self._num_units], dtype=dtype) if sequence_length is None: max_seq_len = math_ops.to_int64(time_len) @@ -661,8 +663,8 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): h_prev=initial_output, w=w, wci=wci, - wco=wco, wcf=wcf, + wco=wco, b=b, forget_bias=self._forget_bias, cell_clip=self._cell_clip, -- GitLab From e74adb670920dd6f41306a4a40784a535ea7b878 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Tue, 10 Oct 2017 12:33:27 -0700 Subject: [PATCH 242/909] Fix S3 BUILD not including files explicitly. This causes remote builds to fail since they AWS headers were missing. PiperOrigin-RevId: 171718021 --- third_party/aws.BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/third_party/aws.BUILD b/third_party/aws.BUILD index 858a55ee07..38b7e0e543 100644 --- a/third_party/aws.BUILD +++ b/third_party/aws.BUILD @@ -19,6 +19,7 @@ cc_library( "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp", ]), }) + glob([ + "aws-cpp-sdk-core/include/**/*.h", "aws-cpp-sdk-core/source/*.cpp", "aws-cpp-sdk-core/source/auth/**/*.cpp", "aws-cpp-sdk-core/source/config/**/*.cpp", @@ -38,6 +39,7 @@ cc_library( "aws-cpp-sdk-core/source/utils/xml/**/*.cpp", "aws-cpp-sdk-core/source/utils/crypto/*.cpp", "aws-cpp-sdk-core/source/utils/crypto/factory/**/*.cpp", + "aws-cpp-sdk-s3/include/**/*.h", "aws-cpp-sdk-s3/source/**/*.cpp", ]), hdrs = [ -- GitLab From 0ffb522f02129c5d23a8b20ef56d0fefd7be91fe Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Tue, 10 Oct 2017 13:06:16 -0700 Subject: [PATCH 243/909] Add a flag to erase "_noinline" attribute to allow total inlining in Grappler. PiperOrigin-RevId: 171722354 --- .../core/grappler/grappler_item_builder.cc | 26 ++++++++++++------- .../core/grappler/grappler_item_builder.h | 20 +++++++------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index cb7d7f7330..d23facf81a 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -74,7 +74,7 @@ void InitializeTensor(DataType type, Tensor* tensor) { // of the cluster type (E.g: single cpu, multiple gpu, etc) being simulated in // order to get the correct session options and environment, and performing the // correct optimizations. -Status OptimizeGraph(const GraphDef& graph_def, GraphDef* output_graph_def, +Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, const ItemConfig& cfg) { if (!cfg.apply_optimizations && !cfg.inline_functions) { return Status::OK(); @@ -83,8 +83,16 @@ Status OptimizeGraph(const GraphDef& graph_def, GraphDef* output_graph_def, // Create a session option for a single GPU device. SessionOptions options; - // Inline all functions. - GraphDef inlined_graph_def(graph_def); + // Make a local copy of graph def, because we need to change some things. + GraphDef graph_def(graph_def_arg); + + if (cfg.inline_functions && cfg.erase_noinline_attributes) { + // TF optimizer doesn't inline functions with "_noinline" attribute, + // so let's go over the function library and erase it. + for (auto& func : *graph_def.mutable_library()->mutable_function()) { + func.mutable_attr()->erase("_noinline"); + } + } // Instantiate all variables for function library runtime creation. std::vector devices; @@ -92,7 +100,7 @@ Status OptimizeGraph(const GraphDef& graph_def, GraphDef* output_graph_def, options, "/job:localhost/replica:0/task:0", &devices)); std::unique_ptr dvc_mgr(new DeviceMgr(devices)); FunctionLibraryDefinition function_library(OpRegistry::Global(), - inlined_graph_def.library()); + graph_def.library()); Env* env = Env::Default(); // Optimizer options: L1 and inlining. L1 is default. @@ -108,7 +116,7 @@ Status OptimizeGraph(const GraphDef& graph_def, GraphDef* output_graph_def, // Create the function library runtime. std::unique_ptr pflr( new ProcessFunctionLibraryRuntime(dvc_mgr.get(), env, - inlined_graph_def.versions().producer(), + graph_def.versions().producer(), &function_library, *optimizer_opts)); FunctionLibraryRuntime* flr = pflr->GetFLR(devices[0]->name()); @@ -118,11 +126,11 @@ Status OptimizeGraph(const GraphDef& graph_def, GraphDef* output_graph_def, graph_ctor_opts.expect_device_spec = false; std::unique_ptr graphptr(new Graph(function_library)); // Populate default attrs to the NodeDefs in the GraphDef. - TF_RETURN_IF_ERROR(AddDefaultAttrsToGraphDef(&inlined_graph_def, - *graphptr->op_registry(), 0)); + TF_RETURN_IF_ERROR( + AddDefaultAttrsToGraphDef(&graph_def, *graphptr->op_registry(), 0)); - TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(graph_ctor_opts, inlined_graph_def, - graphptr.get())); + TF_RETURN_IF_ERROR( + ConvertGraphDefToGraph(graph_ctor_opts, graph_def, graphptr.get())); // Optimize the graph. GraphOptimizer optimizer(*optimizer_opts); diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index 4ce5055e7a..9a7f52228b 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -27,24 +27,22 @@ class MetaGraphDef; namespace grappler { struct ItemConfig { - ItemConfig() - : ignore_user_placement(true), - ignore_colocation(true), - placeholder_unknown_output_shape_dim(-1), - apply_optimizations(false), - inline_functions(false) {} + ItemConfig() {} // If true, ignore all user specified node placement. - bool ignore_user_placement; + bool ignore_user_placement = true; // If true, ignore all user specified colocation attributes. - bool ignore_colocation; + bool ignore_colocation = true; // Dimension to use if a placeholder node has an _output_shapes attribute with // a dimension of -1. - int placeholder_unknown_output_shape_dim; + int placeholder_unknown_output_shape_dim = -1; // If true, does L1 optimizations. - bool apply_optimizations; + bool apply_optimizations = false; // If true, does inlining. - bool inline_functions; + bool inline_functions = false; + // If true, erases all "_noinline" attributes from user-defined functions. + // Has no effect if "inline_functions" is disabled. + bool erase_noinline_attributes = false; // If non-empty, override the directory of asset paths. string assets_directory_override; }; -- GitLab From 3f4c6ccadf51475050549d4d3445e75869768aac Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 10 Oct 2017 14:11:27 -0700 Subject: [PATCH 244/909] Internal change. PiperOrigin-RevId: 171731884 --- tensorflow/contrib/estimator/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index 596f68844b..3b61afe45e 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -50,7 +50,10 @@ py_test( size = "small", srcs = ["python/estimator/dnn_test.py"], srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_pip", + "notsan", + ], deps = [ ":dnn", ":head", -- GitLab From 23418e4317b9e2c4a5148368daec873592a0de9e Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 10 Oct 2017 14:16:21 -0700 Subject: [PATCH 245/909] Move LinearOperator to tf.linalg (with backwards compatibility support in contrib.linalg.) PiperOrigin-RevId: 171732711 --- tensorflow/BUILD | 2 + tensorflow/contrib/cmake/tf_python.cmake | 2 + .../bijectors/affine_linear_operator_test.py | 2 +- .../kernel_tests/distribution_util_test.py | 2 +- .../kernel_tests/vector_diffeomixture_test.py | 4 +- .../python/ops/bijectors/affine_impl.py | 2 +- .../bijectors/affine_linear_operator_impl.py | 6 +- .../python/ops/distribution_util.py | 2 +- .../python/ops/mvn_diag_plus_low_rank.py | 2 +- .../python/ops/mvn_full_covariance.py | 4 +- .../python/ops/mvn_linear_operator.py | 14 +- .../distributions/python/ops/mvn_tril.py | 6 +- .../python/ops/vector_diffeomixture.py | 14 +- .../python/ops/vector_exponential_diag.py | 2 +- .../ops/vector_exponential_linear_operator.py | 8 +- .../ops/vector_laplace_linear_operator.py | 14 +- .../distributions/python/ops/wishart.py | 12 +- tensorflow/contrib/linalg/BUILD | 165 ++---------------- tensorflow/contrib/linalg/__init__.py | 18 +- .../linear_operator_addition_test.py | 27 +-- .../python/ops/linear_operator_addition.py | 15 +- .../api_guides/python/contrib.linalg.md | 4 +- tensorflow/python/BUILD | 18 +- tensorflow/python/__init__.py | 2 +- tensorflow/python/kernel_tests/BUILD | 2 +- tensorflow/python/kernel_tests/linalg/BUILD | 149 ++++++++++++++++ .../python/kernel_tests/linalg/__init__.py | 18 ++ .../linear_operator_composition_test.py | 4 +- .../linalg}/linear_operator_diag_test.py | 4 +- .../linear_operator_full_matrix_test.py | 4 +- .../linalg}/linear_operator_identity_test.py | 4 +- .../linear_operator_low_rank_update_test.py} | 49 +++--- .../linear_operator_lower_triangular_test.py} | 16 +- .../linalg}/linear_operator_test.py | 3 +- .../linalg}/linear_operator_util_test.py | 4 +- .../python/kernel_tests/linalg_ops_test.py | 2 +- tensorflow/python/ops/distributions/util.py | 4 +- tensorflow/python/ops/linalg/BUILD | 38 ++++ .../python/ops/{ => linalg}/__init__.py | 0 .../ops/{linalg_ns.py => linalg/linalg.py} | 14 +- .../python/ops/{ => linalg}/linalg_impl.py | 0 .../ops/linalg}/linear_operator.py | 6 +- .../linalg}/linear_operator_composition.py | 2 +- .../ops/linalg}/linear_operator_diag.py | 4 +- .../linalg}/linear_operator_full_matrix.py | 2 +- .../ops/linalg}/linear_operator_identity.py | 4 +- .../linear_operator_low_rank_update.py} | 30 ++-- .../linear_operator_lower_triangular.py} | 27 +-- .../ops/linalg}/linear_operator_test_util.py | 6 +- .../ops/linalg}/linear_operator_util.py | 0 ...r-operator-composition.__metaclass__.pbtxt | 14 ++ ....linalg.-linear-operator-composition.pbtxt | 134 ++++++++++++++ ....-linear-operator-diag.__metaclass__.pbtxt | 14 ++ ...sorflow.linalg.-linear-operator-diag.pbtxt | 134 ++++++++++++++ ...r-operator-full-matrix.__metaclass__.pbtxt | 14 ++ ....linalg.-linear-operator-full-matrix.pbtxt | 130 ++++++++++++++ ...near-operator-identity.__metaclass__.pbtxt | 14 ++ ...low.linalg.-linear-operator-identity.pbtxt | 131 ++++++++++++++ ...erator-low-rank-update.__metaclass__.pbtxt | 14 ++ ...alg.-linear-operator-low-rank-update.pbtxt | 154 ++++++++++++++++ ...rator-lower-triangular.__metaclass__.pbtxt | 14 ++ ...lg.-linear-operator-lower-triangular.pbtxt | 130 ++++++++++++++ ...erator-scaled-identity.__metaclass__.pbtxt | 14 ++ ...alg.-linear-operator-scaled-identity.pbtxt | 135 ++++++++++++++ ...inalg.-linear-operator.__metaclass__.pbtxt | 14 ++ .../tensorflow.linalg.-linear-operator.pbtxt | 129 ++++++++++++++ .../tools/api/golden/tensorflow.linalg.pbtxt | 32 ++++ 67 files changed, 1631 insertions(+), 333 deletions(-) create mode 100644 tensorflow/python/kernel_tests/linalg/BUILD create mode 100644 tensorflow/python/kernel_tests/linalg/__init__.py rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_composition_test.py (98%) rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_diag_test.py (97%) rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_full_matrix_test.py (98%) rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_identity_test.py (99%) rename tensorflow/{contrib/linalg/python/kernel_tests/linear_operator_udvh_update_test.py => python/kernel_tests/linalg/linear_operator_low_rank_update_test.py} (88%) rename tensorflow/{contrib/linalg/python/kernel_tests/linear_operator_tril_test.py => python/kernel_tests/linalg/linear_operator_lower_triangular_test.py} (86%) rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_test.py (99%) rename tensorflow/{contrib/linalg/python/kernel_tests => python/kernel_tests/linalg}/linear_operator_util_test.py (98%) create mode 100644 tensorflow/python/ops/linalg/BUILD rename tensorflow/python/ops/{ => linalg}/__init__.py (100%) rename tensorflow/python/ops/{linalg_ns.py => linalg/linalg.py} (78%) rename tensorflow/python/ops/{ => linalg}/linalg_impl.py (100%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator.py (99%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_composition.py (99%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_diag.py (98%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_full_matrix.py (98%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_identity.py (99%) rename tensorflow/{contrib/linalg/python/ops/linear_operator_udvh_update.py => python/ops/linalg/linear_operator_low_rank_update.py} (95%) rename tensorflow/{contrib/linalg/python/ops/linear_operator_tril.py => python/ops/linalg/linear_operator_lower_triangular.py} (90%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_test_util.py (99%) rename tensorflow/{contrib/linalg/python/ops => python/ops/linalg}/linear_operator_util.py (100%) create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.__metaclass__.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.pbtxt diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 1620bb5f2a..5bb31d7df1 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -490,7 +490,9 @@ filegroup( "//tensorflow/python/keras:all_files", "//tensorflow/python/kernel_tests:all_files", "//tensorflow/python/kernel_tests/distributions:all_files", + "//tensorflow/python/kernel_tests/linalg:all_files", "//tensorflow/python/ops/distributions:all_files", + "//tensorflow/python/ops/linalg:all_files", "//tensorflow/python/profiler:all_files", "//tensorflow/python/profiler/internal:all_files", "//tensorflow/python/saved_model:all_files", diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index bb3e69d53c..883b36b3fb 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -266,12 +266,14 @@ add_python_module("tensorflow/python/keras/_impl/keras/utils") add_python_module("tensorflow/python/keras/_impl/keras/wrappers") add_python_module("tensorflow/python/kernel_tests") add_python_module("tensorflow/python/kernel_tests/distributions") +add_python_module("tensorflow/python/kernel_tests/linalg") add_python_module("tensorflow/python/layers") add_python_module("tensorflow/python/lib") add_python_module("tensorflow/python/lib/core") add_python_module("tensorflow/python/lib/io") add_python_module("tensorflow/python/ops") add_python_module("tensorflow/python/ops/distributions") +add_python_module("tensorflow/python/ops/linalg") add_python_module("tensorflow/python/ops/losses") add_python_module("tensorflow/python/platform") add_python_module("tensorflow/python/platform/default") diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py index 0738754b21..405ddd292c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_linear_operator_test.py @@ -72,7 +72,7 @@ class AffineLinearOperatorTest(test.TestCase): [3, -2, 0], [4, 3, 2]]], dtype=np.float32) - scale = linalg.LinearOperatorTriL(tril, is_non_singular=True) + scale = linalg.LinearOperatorLowerTriangular(tril, is_non_singular=True) affine = AffineLinearOperator( shift=shift, scale=scale, validate_args=True) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py index d10312d667..2d74aa1f32 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py @@ -23,11 +23,11 @@ import itertools import numpy as np from tensorflow.contrib.distributions.python.ops import distribution_util -from tensorflow.contrib.linalg.python.ops import linear_operator_diag from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops.linalg import linear_operator_diag import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test diff --git a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py index 070ee61be3..aea4d42503 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_diffeomixture_test.py @@ -22,9 +22,9 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import test_util from tensorflow.contrib.distributions.python.ops import vector_diffeomixture as vector_diffeomixture_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_diag as linop_diag_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_identity as linop_identity_lib from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib +from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib from tensorflow.python.platform import test diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py index f74d699a43..05bb9c2f9b 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py @@ -326,7 +326,7 @@ class Affine(bijector.Bijector): shape_hint=shape_hint) if perturb_factor is not None: - return linalg.LinearOperatorUDVHUpdate( + return linalg.LinearOperatorLowRankUpdate( scale, u=perturb_factor, diag_update=perturb_diag, diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py index ae380b5cb2..89043b1410 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_linear_operator_impl.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape -from tensorflow.contrib.linalg.python.ops import linear_operator from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -27,6 +26,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops.distributions import bijector +from tensorflow.python.ops.linalg import linear_operator __all__ = [ @@ -66,7 +66,7 @@ class AffineLinearOperator(bijector.Bijector): Example Use: ```python - linalg = tf.contrib.linalg + linalg = tf.linalg x = [1., 2, 3] @@ -82,7 +82,7 @@ class AffineLinearOperator(bijector.Bijector): tril = [[1., 0, 0], [2, 1, 0], [3, 2, 1]] - scale = linalg.LinearOperatorTriL(tril) + scale = linalg.LinearOperatorLowerTriangular(tril) affine = AffineLinearOperator(shift, scale) # In this case, `forward` is equivalent to: # np.squeeze(np.matmul(tril, np.expand_dims(x, -1)), -1) + shift diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py index 3ed5592bf9..869b5698e5 100644 --- a/tensorflow/contrib/distributions/python/ops/distribution_util.py +++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py @@ -160,7 +160,7 @@ def make_tril_scale( scale_tril = array_ops.matrix_set_diag(scale_tril, tril_diag) - return linalg.LinearOperatorTriL( + return linalg.LinearOperatorLowerTriangular( tril=_maybe_attach_assertion(scale_tril), is_non_singular=True, is_self_adjoint=False, diff --git a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py index ee3e02e020..040bc23072 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_diag_plus_low_rank.py @@ -237,7 +237,7 @@ class MultivariateNormalDiagPlusLowRank( scale_perturb_diag, name="scale_perturb_diag") if has_low_rank: - scale = linalg.LinearOperatorUDVHUpdate( + scale = linalg.LinearOperatorLowRankUpdate( scale, u=scale_perturb_factor, diag_update=scale_perturb_diag, diff --git a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py index 221eed547b..f9952b2069 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_full_covariance.py @@ -174,8 +174,8 @@ class MultivariateNormalFullCovariance(mvn_tril.MultivariateNormalTriL): covariance_matrix = control_flow_ops.with_dependencies( [assert_symmetric], covariance_matrix) # No need to validate that covariance_matrix is non-singular. - # LinearOperatorTriL has an assert_non_singular method that is called - # by the Bijector. + # LinearOperatorLowerTriangular has an assert_non_singular method that + # is called by the Bijector. # However, cholesky() ignores the upper triangular part, so we do need # to separately assert symmetric. scale_tril = linalg_ops.cholesky(covariance_matrix) diff --git a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py index 50c7ba418b..251c2dbdfa 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_linear_operator.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import linalg from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.contrib.distributions.python.ops.bijectors import AffineLinearOperator from tensorflow.python.framework import ops @@ -28,6 +27,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import kullback_leibler from tensorflow.python.ops.distributions import normal from tensorflow.python.ops.distributions import transformed_distribution +from tensorflow.python.ops.linalg import linalg __all__ = [ @@ -92,7 +92,7 @@ class MultivariateNormalLinearOperator( ```python ds = tf.contrib.distributions - la = tf.contrib.linalg + la = tf.linalg # Initialize a single 3-variate Gaussian. mu = [1., 2, 3] @@ -106,7 +106,7 @@ class MultivariateNormalLinearOperator( mvn = ds.MultivariateNormalLinearOperator( loc=mu, - scale=la.LinearOperatorTriL(scale)) + scale=la.LinearOperatorLowerTriangular(scale)) # Covariance agrees with cholesky(cov) parameterization. mvn.covariance().eval() @@ -243,8 +243,8 @@ class MultivariateNormalLinearOperator( def _variance(self): if distribution_util.is_diagonal_scale(self.scale): return math_ops.square(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) - and self.scale.is_self_adjoint): + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and + self.scale.is_self_adjoint): return array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense())) else: @@ -254,8 +254,8 @@ class MultivariateNormalLinearOperator( def _stddev(self): if distribution_util.is_diagonal_scale(self.scale): return math_ops.abs(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) - and self.scale.is_self_adjoint): + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and + self.scale.is_self_adjoint): return math_ops.sqrt(array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense()))) else: diff --git a/tensorflow/contrib/distributions/python/ops/mvn_tril.py b/tensorflow/contrib/distributions/python/ops/mvn_tril.py index 48c4dddc81..e3d68f6b4c 100644 --- a/tensorflow/contrib/distributions/python/ops/mvn_tril.py +++ b/tensorflow/contrib/distributions/python/ops/mvn_tril.py @@ -188,9 +188,9 @@ class MultivariateNormalTriL( assert_proper_shapes=validate_args) else: # No need to validate that scale_tril is non-singular. - # LinearOperatorTriL has an assert_non_singular method that is called - # by the Bijector. - scale = linalg.LinearOperatorTriL( + # LinearOperatorLowerTriangular has an assert_non_singular + # method that is called by the Bijector. + scale = linalg.LinearOperatorLowerTriangular( scale_tril, is_non_singular=True, is_self_adjoint=False, diff --git a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py index 6d297ea1f1..438d628da4 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py +++ b/tensorflow/contrib/distributions/python/ops/vector_diffeomixture.py @@ -23,10 +23,6 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import AffineLinearOperator from tensorflow.contrib.linalg.python.ops import linear_operator_addition as linop_add_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_diag as linop_diag_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_full_matrix as linop_full_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_identity as linop_identity_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_tril as linop_tril_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -37,6 +33,10 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import categorical as categorical_lib from tensorflow.python.ops.distributions import distribution as distribution_lib +from tensorflow.python.ops.linalg import linear_operator_diag as linop_diag_lib +from tensorflow.python.ops.linalg import linear_operator_full_matrix as linop_full_lib +from tensorflow.python.ops.linalg import linear_operator_identity as linop_identity_lib +from tensorflow.python.ops.linalg import linear_operator_lower_triangular as linop_tril_lib static_value = distribution_util.static_value @@ -185,7 +185,7 @@ class VectorDiffeomixture(distribution_lib.Distribution): ```python ds = tf.contrib.distributions - la = tf.contrib.linalg + la = tf.linalg # Create two batches of VectorDiffeomixtures, one with mix_loc=[0.] and # another with mix_loc=[1]. In both cases, `K=2` and the affine @@ -772,8 +772,8 @@ def linop_scale(w, op): is_non_singular=op.is_non_singular, is_self_adjoint=op.is_self_adjoint, is_positive_definite=op.is_positive_definite) - if isinstance(op, linop_tril_lib.LinearOperatorTriL): - return linop_tril_lib.LinearOperatorTriL( + if isinstance(op, linop_tril_lib.LinearOperatorLowerTriangular): + return linop_tril_lib.LinearOperatorLowerTriangular( tril=w[..., array_ops.newaxis, array_ops.newaxis] * op.to_dense(), is_non_singular=op.is_non_singular, is_self_adjoint=op.is_self_adjoint, diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py index c88572e17f..356d78b67a 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_diag.py @@ -90,7 +90,7 @@ class VectorExponentialDiag( ```python ds = tf.contrib.distributions - la = tf.contrib.linalg + la = tf.linalg # Initialize a single 2-variate VectorExponential, supported on # {(x, y) in R^2 : x > 0, y > 0}. diff --git a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py index 7123165417..b313a851b3 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_exponential_linear_operator.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import linalg from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.python.framework import ops @@ -26,6 +25,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import exponential from tensorflow.python.ops.distributions import transformed_distribution +from tensorflow.python.ops.linalg import linalg __all__ = ["VectorExponentialLinearOperator"] @@ -108,7 +108,7 @@ class VectorExponentialLinearOperator( ```python ds = tf.contrib.distributions - la = tf.contrib.linalg + la = tf.linalg # Initialize a single 2-variate VectorExponential, supported on # {(x, y) in R^2 : x > 0, y > 0}. @@ -247,7 +247,7 @@ class VectorExponentialLinearOperator( def _variance(self): if distribution_util.is_diagonal_scale(self.scale): return math_ops.square(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) and + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and self.scale.is_self_adjoint): return array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense())) @@ -258,7 +258,7 @@ class VectorExponentialLinearOperator( def _stddev(self): if distribution_util.is_diagonal_scale(self.scale): return math_ops.abs(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) and + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and self.scale.is_self_adjoint): return math_ops.sqrt( array_ops.matrix_diag_part(self.scale.matmul(self.scale.to_dense()))) diff --git a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py index fdee57695e..c7abdbb4ca 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py +++ b/tensorflow/contrib/distributions/python/ops/vector_laplace_linear_operator.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.python.framework import ops @@ -28,6 +27,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import laplace from tensorflow.python.ops.distributions import transformed_distribution +from tensorflow.python.ops.linalg import linalg __all__ = [ @@ -110,7 +110,7 @@ class VectorLaplaceLinearOperator( ```python ds = tf.contrib.distributions - la = tf.contrib.linalg + la = tf.linalg # Initialize a single 3-variate VectorLaplace with some desired covariance. mu = [1., 2, 3] @@ -126,7 +126,7 @@ class VectorLaplaceLinearOperator( # Divide scale by sqrt(2) so that the final covariance will be what we want. vla = ds.VectorLaplaceLinearOperator( loc=mu, - scale=la.LinearOperatorTriL(scale / tf.sqrt(2))) + scale=la.LinearOperatorLowerTriangular(scale / tf.sqrt(2))) # Covariance agrees with cholesky(cov) parameterization. vla.covariance().eval() @@ -271,8 +271,8 @@ class VectorLaplaceLinearOperator( def _variance(self): if distribution_util.is_diagonal_scale(self.scale): return 2. * math_ops.square(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) - and self.scale.is_self_adjoint): + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and + self.scale.is_self_adjoint): return array_ops.matrix_diag_part( 2. * self.scale.matmul(self.scale.to_dense())) else: @@ -282,8 +282,8 @@ class VectorLaplaceLinearOperator( def _stddev(self): if distribution_util.is_diagonal_scale(self.scale): return np.sqrt(2) * math_ops.abs(self.scale.diag_part()) - elif (isinstance(self.scale, linalg.LinearOperatorUDVHUpdate) - and self.scale.is_self_adjoint): + elif (isinstance(self.scale, linalg.LinearOperatorLowRankUpdate) and + self.scale.is_self_adjoint): return np.sqrt(2) * math_ops.sqrt(array_ops.matrix_diag_part( self.scale.matmul(self.scale.to_dense()))) else: diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index 9d30ce6719..e4ac65012b 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -251,8 +251,8 @@ class _WishartLinearOperator(distribution.Distribution): # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so - # this complexity is O(nbk**2). For LinearOperatorTriL, each matmul is - # O(k^3) so this step has complexity O(nbk^3). + # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, + # each matmul is O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator.matmul(x) # Undo make batch-op ready. @@ -307,8 +307,8 @@ class _WishartLinearOperator(distribution.Distribution): # Complexity: O(nbM*k) where M is the complexity of the operator solving # a vector system. E.g., for LinearOperatorDiag, each solve is O(k), so - # this complexity is O(nbk**2). For LinearOperatorTriL, each solve is - # O(k**2) so this step has complexity O(nbk^3). + # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, + # each solve is O(k**2) so this step has complexity O(nbk^3). scale_sqrt_inv_x_sqrt = self.scale_operator.solve( scale_sqrt_inv_x_sqrt) @@ -544,7 +544,7 @@ class WishartCholesky(_WishartLinearOperator): super(WishartCholesky, self).__init__( df=df, - scale_operator=linalg.LinearOperatorTriL( + scale_operator=linalg.LinearOperatorLowerTriangular( tril=scale, is_non_singular=True, is_positive_definite=True, @@ -655,7 +655,7 @@ class WishartFull(_WishartLinearOperator): ] if validate_args else [], chol) super(WishartFull, self).__init__( df=df, - scale_operator=linalg.LinearOperatorTriL( + scale_operator=linalg.LinearOperatorLowerTriangular( tril=chol, is_non_singular=True, is_positive_definite=True, diff --git a/tensorflow/contrib/linalg/BUILD b/tensorflow/contrib/linalg/BUILD index 810a3d34ee..734bac17dc 100644 --- a/tensorflow/contrib/linalg/BUILD +++ b/tensorflow/contrib/linalg/BUILD @@ -10,152 +10,7 @@ exports_files(["LICENSE"]) package(default_visibility = ["//tensorflow:__subpackages__"]) -load("//tensorflow:tensorflow.bzl", "cuda_py_tests") - -cuda_py_tests( - name = "linear_operator_test", - size = "small", - srcs = ["python/kernel_tests/linear_operator_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - -cuda_py_tests( - name = "linear_operator_addition_test", - size = "small", - srcs = ["python/kernel_tests/linear_operator_addition_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - -cuda_py_tests( - name = "linear_operator_composition_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_composition_test.py"], - additional_deps = [ - ":linalg_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], - tags = ["noasan"], # times out b/63678675 -) - -cuda_py_tests( - name = "linear_operator_diag_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_diag_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - ], -) - -cuda_py_tests( - name = "linear_operator_identity_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_identity_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - ], -) - -cuda_py_tests( - name = "linear_operator_full_matrix_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_full_matrix_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:platform_test", - ], -) - -cuda_py_tests( - name = "linear_operator_tril_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_tril_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:platform_test", - ], -) - -cuda_py_tests( - name = "linear_operator_udvh_update_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_udvh_update_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:platform_test", - ], - shard_count = 5, -) - -cuda_py_tests( - name = "linear_operator_util_test", - size = "medium", - srcs = ["python/kernel_tests/linear_operator_util_test.py"], - additional_deps = [ - ":linalg_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) +load("//tensorflow:tensorflow.bzl", "cuda_py_test") py_library( name = "linalg_py", @@ -176,11 +31,29 @@ py_library( "//tensorflow/python:random_seed", "//tensorflow/python:tensor_util", "//tensorflow/python:util", + "//tensorflow/python/ops/linalg", "//third_party/py/numpy", "@six_archive//:six", ], ) +cuda_py_test( + name = "linear_operator_addition_test", + size = "small", + srcs = ["python/kernel_tests/linear_operator_addition_test.py"], + additional_deps = [ + ":linalg_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/linalg/__init__.py b/tensorflow/contrib/linalg/__init__.py index 44421a6b7d..4720692c33 100644 --- a/tensorflow/contrib/linalg/__init__.py +++ b/tensorflow/contrib/linalg/__init__.py @@ -21,8 +21,8 @@ See the @{$python/contrib.linalg} guide. @@LinearOperatorIdentity @@LinearOperatorScaledIdentity @@LinearOperatorFullMatrix -@@LinearOperatorTriL -@@LinearOperatorUDVHUpdate +@@LinearOperatorLowerTriangular +@@LinearOperatorLowRankUpdate @@LinearOperatorComposition @@add_operators @@ -33,14 +33,14 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member -from tensorflow.contrib.linalg.python.ops.linear_operator import * from tensorflow.contrib.linalg.python.ops.linear_operator_addition import * -from tensorflow.contrib.linalg.python.ops.linear_operator_composition import * -from tensorflow.contrib.linalg.python.ops.linear_operator_diag import * -from tensorflow.contrib.linalg.python.ops.linear_operator_full_matrix import * -from tensorflow.contrib.linalg.python.ops.linear_operator_identity import * -from tensorflow.contrib.linalg.python.ops.linear_operator_tril import * -from tensorflow.contrib.linalg.python.ops.linear_operator_udvh_update import * +from tensorflow.python.ops.linalg.linear_operator import * +from tensorflow.python.ops.linalg.linear_operator_composition import * +from tensorflow.python.ops.linalg.linear_operator_diag import * +from tensorflow.python.ops.linalg.linear_operator_full_matrix import * +from tensorflow.python.ops.linalg.linear_operator_identity import * +from tensorflow.python.ops.linalg.linear_operator_low_rank_update import * +from tensorflow.python.ops.linalg.linear_operator_lower_triangular import * # pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py index 4746484755..6a72df6dfd 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_addition_test.py @@ -19,10 +19,10 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib from tensorflow.contrib.linalg.python.ops import linear_operator_addition from tensorflow.python.framework import random_seed from tensorflow.python.ops import linalg_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.platform import test linalg = linalg_lib @@ -114,7 +114,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): def test_diag_tril_diag(self): op1 = linalg.LinearOperatorDiag( [1., 1.], is_non_singular=True, name="diag_a") - op2 = linalg.LinearOperatorTriL( + op2 = linalg.LinearOperatorLowerTriangular( [[2., 0.], [0., 2.]], is_self_adjoint=True, is_non_singular=True, @@ -125,7 +125,7 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): op_sum = add_operators([op1, op2, op3]) self.assertEqual(1, len(op_sum)) op = op_sum[0] - self.assertTrue(isinstance(op, linalg_lib.LinearOperatorTriL)) + self.assertTrue(isinstance(op, linalg_lib.LinearOperatorLowerTriangular)) self.assertAllClose([[6., 0.], [0., 6.]], op.to_dense().eval()) # The diag operators will be self-adjoint (because real and diagonal). @@ -140,7 +140,8 @@ class LinearOperatorAdditionCorrectnessTest(test.TestCase): op0 = linalg.LinearOperatorFullMatrix( [[-1., -1.], [-1., -1.]], name="matrix") op1 = linalg.LinearOperatorDiag([1., 1.], name="diag_a") - op2 = linalg.LinearOperatorTriL([[2., 0.], [1.5, 2.]], name="tril") + op2 = linalg.LinearOperatorLowerTriangular( + [[2., 0.], [1.5, 2.]], name="tril") op3 = linalg.LinearOperatorDiag([3., 3.], name="diag_b") with self.test_session(): op_sum = add_operators([op0, op1, op2, op3], operator_name="my_operator") @@ -189,7 +190,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): def test_tier_1_additions_done_by_tier_1(self): diag1 = linalg.LinearOperatorDiag([1.]) diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorTriL([[1.]]) + tril = linalg.LinearOperatorLowerTriangular([[1.]]) addition_tiers = [ [linear_operator_addition._AddAndReturnDiag()], [linear_operator_addition._AddAndReturnTriL()], @@ -199,12 +200,12 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): # _BadAdder) was never reached. op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers) self.assertEqual(1, len(op_sum)) - self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorTriL)) + self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular)) def test_tier_1_additions_done_by_tier_1_with_order_flipped(self): diag1 = linalg.LinearOperatorDiag([1.]) diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorTriL([[1.]]) + tril = linalg.LinearOperatorLowerTriangular([[1.]]) addition_tiers = [ [linear_operator_addition._AddAndReturnTriL()], [linear_operator_addition._AddAndReturnDiag()], @@ -216,12 +217,12 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): # Tier 2 was never used (therefore, _BadAdder didn't raise). op_sum = add_operators([diag1, diag2, tril], addition_tiers=addition_tiers) self.assertEqual(1, len(op_sum)) - self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorTriL)) + self.assertTrue(isinstance(op_sum[0], linalg.LinearOperatorLowerTriangular)) def test_cannot_add_everything_so_return_more_than_one_operator(self): diag1 = linalg.LinearOperatorDiag([1.]) diag2 = linalg.LinearOperatorDiag([2.]) - tril5 = linalg.LinearOperatorTriL([[5.]]) + tril5 = linalg.LinearOperatorLowerTriangular([[5.]]) addition_tiers = [ [linear_operator_addition._AddAndReturnDiag()], ] @@ -237,7 +238,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): if isinstance(op, linalg.LinearOperatorDiag): found_diag = True self.assertAllClose([[3.]], op.to_dense().eval()) - if isinstance(op, linalg.LinearOperatorTriL): + if isinstance(op, linalg.LinearOperatorLowerTriangular): found_tril = True self.assertAllClose([[5.]], op.to_dense().eval()) self.assertTrue(found_diag and found_tril) @@ -245,7 +246,7 @@ class LinearOperatorOrderOfAdditionTest(test.TestCase): def test_intermediate_tier_is_not_skipped(self): diag1 = linalg.LinearOperatorDiag([1.]) diag2 = linalg.LinearOperatorDiag([1.]) - tril = linalg.LinearOperatorTriL([[1.]]) + tril = linalg.LinearOperatorLowerTriangular([[1.]]) addition_tiers = [ [linear_operator_addition._AddAndReturnDiag()], [_BadAdder()], @@ -369,14 +370,14 @@ class AddAndReturnTriLTest(test.TestCase): def test_diag_plus_tril(self): diag = linalg.LinearOperatorDiag([1., 2.]) - tril = linalg.LinearOperatorTriL([[10., 0.], [30., 0.]]) + tril = linalg.LinearOperatorLowerTriangular([[10., 0.], [30., 0.]]) hints = linear_operator_addition._Hints( is_positive_definite=True, is_non_singular=True) self.assertTrue(self._adder.can_add(diag, diag)) self.assertTrue(self._adder.can_add(diag, tril)) operator = self._adder.add(diag, tril, "my_operator", hints) - self.assertTrue(isinstance(operator, linalg.LinearOperatorTriL)) + self.assertTrue(isinstance(operator, linalg.LinearOperatorLowerTriangular)) with self.test_session(): self.assertAllClose([[11., 0.], [30., 2.]], operator.to_dense().eval()) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py b/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py index 16c4c6e6d6..86130a2c07 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_addition.py @@ -22,14 +22,14 @@ import abc import six -from tensorflow.contrib.linalg.python.ops import linear_operator -from tensorflow.contrib.linalg.python.ops import linear_operator_diag -from tensorflow.contrib.linalg.python.ops import linear_operator_full_matrix -from tensorflow.contrib.linalg.python.ops import linear_operator_identity -from tensorflow.contrib.linalg.python.ops import linear_operator_tril from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_diag +from tensorflow.python.ops.linalg import linear_operator_full_matrix +from tensorflow.python.ops.linalg import linear_operator_identity +from tensorflow.python.ops.linalg import linear_operator_lower_triangular __all__ = [] @@ -347,7 +347,7 @@ class _AddAndReturnTriL(_Adder): else: op_add_to_tensor, op_other = op2, op1 - return linear_operator_tril.LinearOperatorTriL( + return linear_operator_lower_triangular.LinearOperatorLowerTriangular( tril=op_add_to_tensor.add_to_tensor(op_other.to_dense()), is_non_singular=hints.is_non_singular, is_self_adjoint=hints.is_self_adjoint, @@ -397,7 +397,8 @@ def _type(operator): """Returns the type name constant (e.g. _TRIL) for operator.""" if isinstance(operator, linear_operator_diag.LinearOperatorDiag): return _DIAG - if isinstance(operator, linear_operator_tril.LinearOperatorTriL): + if isinstance(operator, + linear_operator_lower_triangular.LinearOperatorLowerTriangular): return _TRIL if isinstance(operator, linear_operator_full_matrix.LinearOperatorFullMatrix): return _MATRIX diff --git a/tensorflow/docs_src/api_guides/python/contrib.linalg.md b/tensorflow/docs_src/api_guides/python/contrib.linalg.md index 5f1db6c6af..c0cb2b195c 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.linalg.md +++ b/tensorflow/docs_src/api_guides/python/contrib.linalg.md @@ -22,8 +22,8 @@ Subclasses of `LinearOperator` provide a access to common methods on a * @{tf.contrib.linalg.LinearOperatorIdentity} * @{tf.contrib.linalg.LinearOperatorScaledIdentity} * @{tf.contrib.linalg.LinearOperatorFullMatrix} -* @{tf.contrib.linalg.LinearOperatorTriL} -* @{tf.contrib.linalg.LinearOperatorUDVHUpdate} +* @{tf.contrib.linalg.LinearOperatorLowerTriangular} +* @{tf.contrib.linalg.LinearOperatorLowRankUpdate} ### Transformations and Combinations of operators diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 1099611f37..b9b85909a3 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -70,7 +70,6 @@ py_library( ":io_ops", ":layers", ":lib", - ":linalg_ns", ":math_ops", ":metrics", ":nn", @@ -104,6 +103,7 @@ py_library( "//tensorflow/python/keras", "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", + "//tensorflow/python/ops/linalg", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", ] + if_not_windows([ @@ -1710,21 +1710,6 @@ py_library( ], ) -py_library( - name = "linalg_ns", - srcs = [ - "ops/linalg_impl.py", - "ops/linalg_ns.py", - ], - srcs_version = "PY2AND3", - deps = [ - ":array_ops", - ":linalg_ops", - ":math_ops", - ":special_math_ops", - ], -) - py_library( name = "linalg_grad", srcs = ["ops/linalg_grad.py"], @@ -2223,6 +2208,7 @@ py_library( ":variable_scope", ":variables", "//tensorflow/python/ops/distributions", + "//tensorflow/python/ops/linalg", ], ) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index f21f1f822c..8d9c5de9ad 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -73,7 +73,6 @@ from tensorflow.python.ops.standard_ops import * # Namespaces from tensorflow.python.ops import initializers_ns as initializers -from tensorflow.python.ops import linalg_ns as linalg # pylint: enable=wildcard-import @@ -90,6 +89,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import sets from tensorflow.python.ops import spectral_ops as spectral from tensorflow.python.ops.distributions import distributions +from tensorflow.python.ops.linalg import linalg from tensorflow.python.ops.losses import losses from tensorflow.python.profiler import profiler from tensorflow.python.saved_model import saved_model diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 206c6a5692..b8a7444f45 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1485,8 +1485,8 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:linalg_ops", - "//tensorflow/python:linalg_ns", "//tensorflow/python:math_ops", + "//tensorflow/python/ops/linalg", ], tags = ["no_windows_gpu"], ) diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD new file mode 100644 index 0000000000..4e18eaa4e8 --- /dev/null +++ b/tensorflow/python/kernel_tests/linalg/BUILD @@ -0,0 +1,149 @@ +# Tests of TensorFlow kernels written using the Python API. + +package( + default_visibility = ["//tensorflow:internal"], +) + +licenses(["notice"]) # Apache 2.0 + +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +cuda_py_test( + name = "linear_operator_test", + size = "small", + srcs = ["linear_operator_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + +cuda_py_test( + name = "linear_operator_composition_test", + size = "medium", + srcs = ["linear_operator_composition_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], + tags = ["noasan"], # times out b/63678675 +) + +cuda_py_test( + name = "linear_operator_diag_test", + size = "medium", + srcs = ["linear_operator_diag_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:linalg_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + ], +) + +cuda_py_test( + name = "linear_operator_identity_test", + size = "medium", + srcs = ["linear_operator_identity_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:linalg_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + ], +) + +cuda_py_test( + name = "linear_operator_full_matrix_test", + size = "medium", + srcs = ["linear_operator_full_matrix_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], +) + +cuda_py_test( + name = "linear_operator_lower_triangular_test", + size = "medium", + srcs = ["linear_operator_lower_triangular_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], +) + +cuda_py_test( + name = "linear_operator_low_rank_update_test", + size = "medium", + srcs = ["linear_operator_low_rank_update_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + shard_count = 5, +) + +cuda_py_test( + name = "linear_operator_util_test", + size = "medium", + srcs = ["linear_operator_util_test.py"], + additional_deps = [ + "//tensorflow/python/ops/linalg", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/python/kernel_tests/linalg/__init__.py b/tensorflow/python/kernel_tests/linalg/__init__.py new file mode 100644 index 0000000000..1f6cb4a020 --- /dev/null +++ b/tensorflow/python/kernel_tests/linalg/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Kernel tests for tf.linalg.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py similarity index 98% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py index e2a7f5fbe1..4d79365dbe 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_composition_test.py @@ -19,13 +19,13 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py similarity index 97% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_diag_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py index 397bfa2215..343d158498 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_diag_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py @@ -17,13 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_full_matrix_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py similarity index 98% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_full_matrix_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py index 528bc3ed12..50d6f524e9 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_full_matrix_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_full_matrix_test.py @@ -19,13 +19,13 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_identity_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py similarity index 99% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_identity_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py index 5faf2c432b..6d63570768 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_identity_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_identity_test.py @@ -19,13 +19,13 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_udvh_update_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py similarity index 88% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_udvh_update_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py index f28213096b..d3a47da946 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_udvh_update_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_low_rank_update_test.py @@ -19,12 +19,12 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib @@ -32,7 +32,7 @@ random_seed.set_random_seed(23) rng = np.random.RandomState(0) -class BaseLinearOperatorUDVHUpdatetest(object): +class BaseLinearOperatorLowRankUpdatetest(object): """Base test for this type of operator.""" # Subclasses should set these attributes to either True or False. @@ -51,7 +51,7 @@ class BaseLinearOperatorUDVHUpdatetest(object): @property def _dtypes_to_test(self): # TODO(langmore) Test complex types once cholesky works with them. - # See comment in LinearOperatorUDVHUpdate.__init__. + # See comment in LinearOperatorLowRankUpdate.__init__. return [dtypes.float32, dtypes.float64] @property @@ -108,7 +108,7 @@ class BaseLinearOperatorUDVHUpdatetest(object): base_operator = linalg.LinearOperatorDiag( base_diag_ph, is_positive_definite=True) - operator = linalg.LinearOperatorUDVHUpdate( + operator = linalg.LinearOperatorLowRankUpdate( base_operator, u=u_ph, v=v_ph if self._use_v else None, @@ -122,7 +122,7 @@ class BaseLinearOperatorUDVHUpdatetest(object): else: base_operator = linalg.LinearOperatorDiag( base_diag, is_positive_definite=True) - operator = linalg.LinearOperatorUDVHUpdate( + operator = linalg.LinearOperatorLowRankUpdate( base_operator, u, v=v if self._use_v else None, @@ -164,8 +164,8 @@ class BaseLinearOperatorUDVHUpdatetest(object): return operator, mat, feed_dict -class LinearOperatorUDVHUpdatetestWithDiagUseCholesky( - BaseLinearOperatorUDVHUpdatetest, +class LinearOperatorLowRankUpdatetestWithDiagUseCholesky( + BaseLinearOperatorLowRankUpdatetest, linear_operator_test_util.SquareLinearOperatorDerivedClassTest): """A = L + UDU^H, D > 0, L > 0 ==> A > 0 and we can use a Cholesky.""" @@ -182,8 +182,8 @@ class LinearOperatorUDVHUpdatetestWithDiagUseCholesky( self._rtol[dtypes.float64] = 1e-10 -class LinearOperatorUDVHUpdatetestWithDiagCannotUseCholesky( - BaseLinearOperatorUDVHUpdatetest, +class LinearOperatorLowRankUpdatetestWithDiagCannotUseCholesky( + BaseLinearOperatorLowRankUpdatetest, linear_operator_test_util.SquareLinearOperatorDerivedClassTest): """A = L + UDU^H, D !> 0, L > 0 ==> A !> 0 and we cannot use a Cholesky.""" @@ -201,8 +201,8 @@ class LinearOperatorUDVHUpdatetestWithDiagCannotUseCholesky( self._rtol[dtypes.float64] = 1e-9 -class LinearOperatorUDVHUpdatetestNoDiagUseCholesky( - BaseLinearOperatorUDVHUpdatetest, +class LinearOperatorLowRankUpdatetestNoDiagUseCholesky( + BaseLinearOperatorLowRankUpdatetest, linear_operator_test_util.SquareLinearOperatorDerivedClassTest): """A = L + UU^H, L > 0 ==> A > 0 and we can use a Cholesky.""" @@ -219,8 +219,8 @@ class LinearOperatorUDVHUpdatetestNoDiagUseCholesky( self._rtol[dtypes.float64] = 1e-10 -class LinearOperatorUDVHUpdatetestNoDiagCannotUseCholesky( - BaseLinearOperatorUDVHUpdatetest, +class LinearOperatorLowRankUpdatetestNoDiagCannotUseCholesky( + BaseLinearOperatorLowRankUpdatetest, linear_operator_test_util.SquareLinearOperatorDerivedClassTest): """A = L + UV^H, L > 0 ==> A is not symmetric and we cannot use a Cholesky.""" @@ -238,8 +238,8 @@ class LinearOperatorUDVHUpdatetestNoDiagCannotUseCholesky( self._rtol[dtypes.float64] = 1e-9 -class LinearOperatorUDVHUpdatetestWithDiagNotSquare( - BaseLinearOperatorUDVHUpdatetest, +class LinearOperatorLowRankUpdatetestWithDiagNotSquare( + BaseLinearOperatorLowRankUpdatetest, linear_operator_test_util.NonSquareLinearOperatorDerivedClassTest): """A = L + UDU^H, D > 0, L > 0 ==> A > 0 and we can use a Cholesky.""" @@ -248,7 +248,7 @@ class LinearOperatorUDVHUpdatetestWithDiagNotSquare( _use_v = True -class LinearOpearatorUDVHUpdateBroadcastsShape(test.TestCase): +class LinearOpearatorLowRankUpdateBroadcastsShape(test.TestCase): """Test that the operator's shape is the broadcast of arguments.""" def test_static_shape_broadcasts_up_from_operator_to_other_args(self): @@ -256,8 +256,7 @@ class LinearOpearatorUDVHUpdateBroadcastsShape(test.TestCase): u = array_ops.ones(shape=[2, 3, 2]) diag = array_ops.ones(shape=[2, 2]) - operator = linalg.LinearOperatorUDVHUpdate( - base_operator, u, diag) + operator = linalg.LinearOperatorLowRankUpdate(base_operator, u, diag) # domain_dimension is 3 self.assertAllEqual([2, 3, 3], operator.shape) @@ -272,7 +271,7 @@ class LinearOpearatorUDVHUpdateBroadcastsShape(test.TestCase): u_shape_ph = array_ops.placeholder(dtypes.int32) u = array_ops.ones(shape=u_shape_ph) - operator = linalg.LinearOperatorUDVHUpdate(base_operator, u) + operator = linalg.LinearOperatorLowRankUpdate(base_operator, u) feed_dict = { num_rows_ph: 3, @@ -290,34 +289,34 @@ class LinearOpearatorUDVHUpdateBroadcastsShape(test.TestCase): u = rng.rand(5, 3, 2) v = rng.rand(4, 3, 2) with self.assertRaisesRegexp(ValueError, "Incompatible shapes"): - linalg.LinearOperatorUDVHUpdate(base_operator, u=u, v=v) + linalg.LinearOperatorLowRankUpdate(base_operator, u=u, v=v) def test_u_and_base_operator_incompatible_batch_shape_raises(self): base_operator = linalg.LinearOperatorIdentity( num_rows=3, batch_shape=[4], dtype=np.float64) u = rng.rand(5, 3, 2) with self.assertRaisesRegexp(ValueError, "Incompatible shapes"): - linalg.LinearOperatorUDVHUpdate(base_operator, u=u) + linalg.LinearOperatorLowRankUpdate(base_operator, u=u) def test_u_and_base_operator_incompatible_domain_dimension(self): base_operator = linalg.LinearOperatorIdentity(num_rows=3, dtype=np.float64) u = rng.rand(5, 4, 2) with self.assertRaisesRegexp(ValueError, "not compatible"): - linalg.LinearOperatorUDVHUpdate(base_operator, u=u) + linalg.LinearOperatorLowRankUpdate(base_operator, u=u) def test_u_and_diag_incompatible_low_rank_raises(self): base_operator = linalg.LinearOperatorIdentity(num_rows=3, dtype=np.float64) u = rng.rand(5, 3, 2) diag = rng.rand(5, 4) # Last dimension should be 2 with self.assertRaisesRegexp(ValueError, "not compatible"): - linalg.LinearOperatorUDVHUpdate(base_operator, u=u, diag_update=diag) + linalg.LinearOperatorLowRankUpdate(base_operator, u=u, diag_update=diag) def test_diag_incompatible_batch_shape_raises(self): base_operator = linalg.LinearOperatorIdentity(num_rows=3, dtype=np.float64) u = rng.rand(5, 3, 2) diag = rng.rand(4, 2) # First dimension should be 5 with self.assertRaisesRegexp(ValueError, "Incompatible shapes"): - linalg.LinearOperatorUDVHUpdate(base_operator, u=u, diag_update=diag) + linalg.LinearOperatorLowRankUpdate(base_operator, u=u, diag_update=diag) if __name__ == "__main__": diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_tril_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py similarity index 86% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_tril_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py index 9f5f2856f1..db3918f998 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_tril_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_lower_triangular_test.py @@ -17,18 +17,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_test_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib +from tensorflow.python.ops.linalg import linear_operator_test_util from tensorflow.python.platform import test linalg = linalg_lib random_seed.set_random_seed(23) -class LinearOperatorTriLTest( +class LinearOperatorLowerTriangularTest( linear_operator_test_util.SquareLinearOperatorDerivedClassTest): """Most tests done in the base class LinearOperatorDerivedClassTest.""" @@ -50,10 +50,10 @@ class LinearOperatorTriLTest( # tril is random and we want the same value used for both mat and # feed_dict. tril = tril.eval() - operator = linalg.LinearOperatorTriL(tril_ph) + operator = linalg.LinearOperatorLowerTriangular(tril_ph) feed_dict = {tril_ph: tril} else: - operator = linalg.LinearOperatorTriL(tril) + operator = linalg.LinearOperatorLowerTriangular(tril) feed_dict = None mat = array_ops.matrix_band_part(tril, -1, 0) @@ -64,14 +64,14 @@ class LinearOperatorTriLTest( # Singlular matrix with one positive eigenvalue and one zero eigenvalue. with self.test_session(): tril = [[1., 0.], [1., 0.]] - operator = linalg.LinearOperatorTriL(tril) + operator = linalg.LinearOperatorLowerTriangular(tril) with self.assertRaisesOpError("Singular operator"): operator.assert_non_singular().run() def test_is_x_flags(self): # Matrix with two positive eigenvalues. tril = [[1., 0.], [1., 1.]] - operator = linalg.LinearOperatorTriL( + operator = linalg.LinearOperatorLowerTriangular( tril, is_positive_definite=True, is_non_singular=True, @@ -82,7 +82,7 @@ class LinearOperatorTriLTest( def test_tril_must_have_at_least_two_dims_or_raises(self): with self.assertRaisesRegexp(ValueError, "at least 2 dimensions"): - linalg.LinearOperatorTriL([1.]) + linalg.LinearOperatorLowerTriangular([1.]) if __name__ == "__main__": diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py similarity index 99% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_test.py index 78a4822c17..8e9f0150a2 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py @@ -17,7 +17,7 @@ from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -25,6 +25,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg as linalg_lib from tensorflow.python.platform import test linalg = linalg_lib diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py similarity index 98% rename from tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py rename to tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py index f047f4b978..ca3c8647db 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_util_test.py @@ -19,16 +19,14 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib import linalg as linalg_lib -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test -linalg = linalg_lib random_seed.set_random_seed(23) rng = np.random.RandomState(0) diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index be15e49f60..8bb583ce1b 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -22,9 +22,9 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ns as linalg from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linalg from tensorflow.python.platform import test diff --git a/tensorflow/python/ops/distributions/util.py b/tensorflow/python/ops/distributions/util.py index 089ec49f06..f261d996b5 100644 --- a/tensorflow/python/ops/distributions/util.py +++ b/tensorflow/python/ops/distributions/util.py @@ -523,8 +523,8 @@ def matrix_diag_transform(matrix, transform=None, name=None): # valid Cholesky factor. chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) - # LinearOperatorTriL ignores the upper triangle. - operator = LinearOperatorTriL(chol) + # LinearOperatorLowerTriangular ignores the upper triangle. + operator = LinearOperatorLowerTriangular(chol) ``` Example of heteroskedastic 2-D linear regression. diff --git a/tensorflow/python/ops/linalg/BUILD b/tensorflow/python/ops/linalg/BUILD new file mode 100644 index 0000000000..a36e0a4be1 --- /dev/null +++ b/tensorflow/python/ops/linalg/BUILD @@ -0,0 +1,38 @@ +package( + default_visibility = ["//tensorflow:internal"], +) + +licenses(["notice"]) # Apache 2.0 + +py_library( + name = "linalg", + srcs = glob(["*.py"]), + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:nn", + "//tensorflow/python:nn_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:special_math_ops", + "//tensorflow/python:tensor_util", + "//tensorflow/python:util", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/python/ops/__init__.py b/tensorflow/python/ops/linalg/__init__.py similarity index 100% rename from tensorflow/python/ops/__init__.py rename to tensorflow/python/ops/linalg/__init__.py diff --git a/tensorflow/python/ops/linalg_ns.py b/tensorflow/python/ops/linalg/linalg.py similarity index 78% rename from tensorflow/python/ops/linalg_ns.py rename to tensorflow/python/ops/linalg/linalg.py index 92e488a6ce..02ceb65e2a 100644 --- a/tensorflow/python/ops/linalg_ns.py +++ b/tensorflow/python/ops/linalg/linalg.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Public API for tf.linalg namespace. - -@@logdet -""" +"""Public API for tf.linalg namespace.""" from __future__ import absolute_import from __future__ import division @@ -29,7 +26,14 @@ from tensorflow.python.ops import special_math_ops # go/tf-wildcard-import # pylint: disable=wildcard-import,unused-import -from tensorflow.python.ops.linalg_impl import * +from tensorflow.python.ops.linalg.linalg_impl import * +from tensorflow.python.ops.linalg.linear_operator import * +from tensorflow.python.ops.linalg.linear_operator_composition import * +from tensorflow.python.ops.linalg.linear_operator_diag import * +from tensorflow.python.ops.linalg.linear_operator_full_matrix import * +from tensorflow.python.ops.linalg.linear_operator_identity import * +from tensorflow.python.ops.linalg.linear_operator_low_rank_update import * +from tensorflow.python.ops.linalg.linear_operator_lower_triangular import * # pylint: enable=wildcard-import # Linear algebra ops. diff --git a/tensorflow/python/ops/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py similarity index 100% rename from tensorflow/python/ops/linalg_impl.py rename to tensorflow/python/ops/linalg/linalg_impl.py diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator.py b/tensorflow/python/ops/linalg/linear_operator.py similarity index 99% rename from tensorflow/contrib/linalg/python/ops/linear_operator.py rename to tensorflow/python/ops/linalg/linear_operator.py index 91c0938e39..17c338ec75 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator.py +++ b/tensorflow/python/ops/linalg/linear_operator.py @@ -23,13 +23,13 @@ import contextlib import numpy as np -from tensorflow.contrib import framework as contrib_framework -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import tf_logging as logging __all__ = ["LinearOperator"] @@ -192,7 +192,7 @@ class LinearOperator(object): graph_parents = [] if graph_parents is None else graph_parents for i, t in enumerate(graph_parents): - if t is None or not contrib_framework.is_tensor(t): + if t is None or not tensor_util.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) self._dtype = dtype self._graph_parents = graph_parents diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py b/tensorflow/python/ops/linalg/linear_operator_composition.py similarity index 99% rename from tensorflow/contrib/linalg/python/ops/linear_operator_composition.py rename to tensorflow/python/ops/linalg/linear_operator_composition.py index 0a71a73a9c..14411291d4 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py +++ b/tensorflow/python/ops/linalg/linear_operator_composition.py @@ -18,13 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.linalg.python.ops import linear_operator from tensorflow.python.framework import common_shapes from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops +from tensorflow.python.ops.linalg import linear_operator __all__ = ["LinearOperatorComposition"] diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py b/tensorflow/python/ops/linalg/linear_operator_diag.py similarity index 98% rename from tensorflow/contrib/linalg/python/ops/linear_operator_diag.py rename to tensorflow/python/ops/linalg/linear_operator_diag.py index 29184483bf..e1558a351d 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_diag.py @@ -18,13 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.linalg.python.ops import linear_operator -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_util __all__ = ["LinearOperatorDiag",] diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_full_matrix.py b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py similarity index 98% rename from tensorflow/contrib/linalg/python/ops/linear_operator_full_matrix.py rename to tensorflow/python/ops/linalg/linear_operator_full_matrix.py index 52b40eaf8d..dd4c7cb041 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_full_matrix.py +++ b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py @@ -18,11 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.linalg.python.ops import linear_operator from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator __all__ = ["LinearOperatorFullMatrix"] diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py b/tensorflow/python/ops/linalg/linear_operator_identity.py similarity index 99% rename from tensorflow/contrib/linalg/python/ops/linear_operator_identity.py rename to tensorflow/python/ops/linalg/linear_operator_identity.py index b9ac90ff33..18bd2f9f6d 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py +++ b/tensorflow/python/ops/linalg/linear_operator_identity.py @@ -20,8 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.linalg.python.ops import linear_operator -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -30,6 +28,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_util __all__ = [ "LinearOperatorIdentity", diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_udvh_update.py b/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py similarity index 95% rename from tensorflow/contrib/linalg/python/ops/linear_operator_udvh_update.py rename to tensorflow/python/ops/linalg/linear_operator_low_rank_update.py index 9c9c359574..ad3bb2efa9 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_udvh_update.py +++ b/tensorflow/python/ops/linalg/linear_operator_low_rank_update.py @@ -18,20 +18,22 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util -from tensorflow.contrib.linalg.python.ops import linear_operator -from tensorflow.contrib.linalg.python.ops import linear_operator_diag -from tensorflow.contrib.linalg.python.ops import linear_operator_identity from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_diag +from tensorflow.python.ops.linalg import linear_operator_identity -__all__ = ["LinearOperatorUDVHUpdate",] +__all__ = [ + "LinearOperatorLowRankUpdate", +] -class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): +class LinearOperatorLowRankUpdate(linear_operator.LinearOperator): """Perturb a `LinearOperator` with a rank `K` update. This operator acts like a [batch] matrix `A` with shape @@ -39,7 +41,7 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): batch member. For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is an `M x N` matrix. - `LinearOperatorUDVHUpdate` represents `A = L + U D V^H`, where + `LinearOperatorLowRankUpdate` represents `A = L + U D V^H`, where ``` L, is a LinearOperator representing [batch] M x N matrices @@ -65,7 +67,7 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): is_positive_definite=True) # Perturb with a rank 2 perturbation - operator = LinearOperatorUDVHUpdate( + operator = LinearOperatorLowRankUpdate( operator=diag_operator, u=[[1., 2.], [-1., 3.], [0., 0.]], diag_update=[11., 12.], @@ -94,7 +96,7 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): ### Performance - Suppose `operator` is a `LinearOperatorUDVHUpdate` of shape `[M, N]`, + Suppose `operator` is a `LinearOperatorLowRankUpdate` of shape `[M, N]`, made from a rank `K` update of `base_operator` which performs `.matmul(x)` on `x` having `x.shape = [N, R]` with `O(L_matmul*N*R)` complexity (and similarly for `solve`, `determinant`. Then, if `x.shape = [N, R]`, @@ -134,8 +136,8 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): is_self_adjoint=None, is_positive_definite=None, is_square=None, - name="LinearOperatorUDVHUpdate"): - """Initialize a `LinearOperatorUDVHUpdate`. + name="LinearOperatorLowRankUpdate"): + """Initialize a `LinearOperatorLowRankUpdate`. This creates a `LinearOperator` of the form `A = L + U D V^H`, with `L` a `LinearOperator`, `U, V` both [batch] matrices, and `D` a [batch] @@ -249,7 +251,7 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): self.u, self._diag_update, self.v] graph_parents = [p for p in graph_parents if p is not None] - super(LinearOperatorUDVHUpdate, self).__init__( + super(LinearOperatorLowRankUpdate, self).__init__( dtype=self._base_operator.dtype, graph_parents=graph_parents, is_non_singular=is_non_singular, @@ -262,8 +264,8 @@ class LinearOperatorUDVHUpdate(linear_operator.LinearOperator): self._set_diag_operators(diag_update, is_diag_update_positive) self._is_diag_update_positive = is_diag_update_positive - contrib_tensor_util.assert_same_float_dtype( - (base_operator, self.u, self.v, self._diag_update)) + check_ops.assert_same_float_dtype((base_operator, self.u, self.v, + self._diag_update)) self._check_shapes() # Pre-compute the so-called "capacitance" matrix diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py similarity index 90% rename from tensorflow/contrib/linalg/python/ops/linear_operator_tril.py rename to tensorflow/python/ops/linalg/linear_operator_lower_triangular.py index 22ccf6f131..4b074f5cec 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py +++ b/tensorflow/python/ops/linalg/linear_operator_lower_triangular.py @@ -18,18 +18,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.linalg.python.ops import linear_operator -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator +from tensorflow.python.ops.linalg import linear_operator_util -__all__ = ["LinearOperatorTriL",] +__all__ = [ + "LinearOperatorLowerTriangular", +] -class LinearOperatorTriL(linear_operator.LinearOperator): +class LinearOperatorLowerTriangular(linear_operator.LinearOperator): """`LinearOperator` acting like a [batch] square lower triangular matrix. This operator acts like a [batch] lower triangular matrix `A` with shape @@ -37,13 +39,14 @@ class LinearOperatorTriL(linear_operator.LinearOperator): batch member. For every batch index `(i1,...,ib)`, `A[i1,...,ib, : :]` is an `N x N` matrix. - `LinearOperatorTriL` is initialized with a `Tensor` having dimensions - `[B1,...,Bb, N, N]`. The upper triangle of the last two dimensions is ignored. + `LinearOperatorLowerTriangular` is initialized with a `Tensor` having + dimensions `[B1,...,Bb, N, N]`. The upper triangle of the last two + dimensions is ignored. ```python # Create a 2 x 2 lower-triangular linear operator. tril = [[1., 2.], [3., 4.]] - operator = LinearOperatorTriL(tril) + operator = LinearOperatorLowerTriangular(tril) # The upper triangle is ignored. operator.to_dense() @@ -62,7 +65,7 @@ class LinearOperatorTriL(linear_operator.LinearOperator): # Create a [2, 3] batch of 4 x 4 linear operators. tril = tf.random_normal(shape=[2, 3, 4, 4]) - operator = LinearOperatorTriL(tril) + operator = LinearOperatorLowerTriangular(tril) ``` #### Shape compatibility @@ -77,7 +80,7 @@ class LinearOperatorTriL(linear_operator.LinearOperator): #### Performance - Suppose `operator` is a `LinearOperatorTriL` of shape `[N, N]`, + Suppose `operator` is a `LinearOperatorLowerTriangular` of shape `[N, N]`, and `x.shape = [N, R]`. Then * `operator.matmul(x)` involves `N^2 * R` multiplications. @@ -108,8 +111,8 @@ class LinearOperatorTriL(linear_operator.LinearOperator): is_self_adjoint=None, is_positive_definite=None, is_square=None, - name="LinearOperatorTriL"): - r"""Initialize a `LinearOperatorTriL`. + name="LinearOperatorLowerTriangular"): + r"""Initialize a `LinearOperatorLowerTriangular`. Args: tril: Shape `[B1,...,Bb, N, N]` with `b >= 0`, `N >= 0`. @@ -147,7 +150,7 @@ class LinearOperatorTriL(linear_operator.LinearOperator): self._tril = array_ops.matrix_band_part(tril, -1, 0) self._diag = array_ops.matrix_diag_part(self._tril) - super(LinearOperatorTriL, self).__init__( + super(LinearOperatorLowerTriangular, self).__init__( dtype=self._tril.dtype, graph_parents=[self._tril], is_non_singular=is_non_singular, diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py b/tensorflow/python/ops/linalg/linear_operator_test_util.py similarity index 99% rename from tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py rename to tensorflow/python/ops/linalg/linear_operator_test_util.py index af14f34600..b86cb6d84d 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_test_util.py @@ -22,16 +22,16 @@ import abc import numpy as np import six -from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util -from tensorflow.contrib.linalg.python.ops import linear_operator_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops.linalg import linear_operator_util from tensorflow.python.platform import test @@ -428,7 +428,7 @@ def random_positive_definite_matrix(shape, dtype, force_well_conditioned=False): `Tensor` with desired shape and dtype. """ dtype = dtypes.as_dtype(dtype) - if not contrib_tensor_util.is_tensor(shape): + if not tensor_util.is_tensor(shape): shape = tensor_shape.TensorShape(shape) # Matrix must be square. shape[-1].assert_is_compatible_with(shape[-2]) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_util.py b/tensorflow/python/ops/linalg/linear_operator_util.py similarity index 100% rename from tensorflow/contrib/linalg/python/ops/linear_operator_util.py rename to tensorflow/python/ops/linalg/linear_operator_util.py diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.__metaclass__.pbtxt new file mode 100644 index 0000000000..1adbcb41ad --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorComposition.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.pbtxt new file mode 100644 index 0000000000..42d22bce42 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-composition.pbtxt @@ -0,0 +1,134 @@ +path: "tensorflow.linalg.LinearOperatorComposition" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "operators" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'operators\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.__metaclass__.pbtxt new file mode 100644 index 0000000000..023d90ccdb --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorDiag.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.pbtxt new file mode 100644 index 0000000000..d6749fdcec --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-diag.pbtxt @@ -0,0 +1,134 @@ +path: "tensorflow.linalg.LinearOperatorDiag" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "diag" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'diag\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'LinearOperatorDiag\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.__metaclass__.pbtxt new file mode 100644 index 0000000000..381072e76c --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorFullMatrix.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.pbtxt new file mode 100644 index 0000000000..d9f363d133 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-full-matrix.pbtxt @@ -0,0 +1,130 @@ +path: "tensorflow.linalg.LinearOperatorFullMatrix" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'matrix\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'LinearOperatorFullMatrix\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.__metaclass__.pbtxt new file mode 100644 index 0000000000..5d115b35fb --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorIdentity.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.pbtxt new file mode 100644 index 0000000000..aac7ee31ed --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-identity.pbtxt @@ -0,0 +1,131 @@ +path: "tensorflow.linalg.LinearOperatorIdentity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'num_rows\', \'batch_shape\', \'dtype\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'assert_proper_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'True\', \'True\', \'True\', \'False\', \'LinearOperatorIdentity\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'mat\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.__metaclass__.pbtxt new file mode 100644 index 0000000000..1f0d33298a --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorLowRankUpdate.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.pbtxt new file mode 100644 index 0000000000..3ee800269e --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-low-rank-update.pbtxt @@ -0,0 +1,154 @@ +path: "tensorflow.linalg.LinearOperatorLowRankUpdate" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "base_operator" + mtype: "" + } + member { + name: "batch_shape" + mtype: "" + } + member { + name: "diag_operator" + mtype: "" + } + member { + name: "diag_update" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_diag_update_positive" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member { + name: "u" + mtype: "" + } + member { + name: "v" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'base_operator\', \'u\', \'diag_update\', \'v\', \'is_diag_update_positive\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'LinearOperatorLowRankUpdate\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.__metaclass__.pbtxt new file mode 100644 index 0000000000..2683430f4f --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorLowerTriangular.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.pbtxt new file mode 100644 index 0000000000..63a1bc2321 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-lower-triangular.pbtxt @@ -0,0 +1,130 @@ +path: "tensorflow.linalg.LinearOperatorLowerTriangular" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'tril\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'LinearOperatorLowerTriangular\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.__metaclass__.pbtxt new file mode 100644 index 0000000000..38bf7ad586 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperatorScaledIdentity.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.pbtxt new file mode 100644 index 0000000000..e2c5a505a7 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator-scaled-identity.pbtxt @@ -0,0 +1,135 @@ +path: "tensorflow.linalg.LinearOperatorScaledIdentity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "multiplier" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'num_rows\', \'multiplier\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'assert_proper_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'False\', \'LinearOperatorScaledIdentity\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'mat\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.__metaclass__.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.__metaclass__.pbtxt new file mode 100644 index 0000000000..38da809b36 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.__metaclass__.pbtxt @@ -0,0 +1,14 @@ +path: "tensorflow.linalg.LinearOperator.__metaclass__" +tf_class { + is_instance: "" + member_method { + name: "__init__" + } + member_method { + name: "mro" + } + member_method { + name: "register" + argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.pbtxt new file mode 100644 index 0000000000..6d849dc040 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.linalg.-linear-operator.pbtxt @@ -0,0 +1,129 @@ +path: "tensorflow.linalg.LinearOperator" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "batch_shape" + mtype: "" + } + member { + name: "domain_dimension" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "graph_parents" + mtype: "" + } + member { + name: "is_non_singular" + mtype: "" + } + member { + name: "is_positive_definite" + mtype: "" + } + member { + name: "is_self_adjoint" + mtype: "" + } + member { + name: "is_square" + mtype: "" + } + member { + name: "name" + mtype: "" + } + member { + name: "range_dimension" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "tensor_rank" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\', \'graph_parents\', \'is_non_singular\', \'is_self_adjoint\', \'is_positive_definite\', \'is_square\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "add_to_tensor" + argspec: "args=[\'self\', \'x\', \'name\'], varargs=None, keywords=None, defaults=[\'add_to_tensor\'], " + } + member_method { + name: "assert_non_singular" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_non_singular\'], " + } + member_method { + name: "assert_positive_definite" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_positive_definite\'], " + } + member_method { + name: "assert_self_adjoint" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'assert_self_adjoint\'], " + } + member_method { + name: "batch_shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'batch_shape_tensor\'], " + } + member_method { + name: "determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'det\'], " + } + member_method { + name: "diag_part" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'diag_part\'], " + } + member_method { + name: "domain_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'domain_dimension_tensor\'], " + } + member_method { + name: "log_abs_determinant" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'log_abs_det\'], " + } + member_method { + name: "matmul" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'matmul\'], " + } + member_method { + name: "matvec" + argspec: "args=[\'self\', \'x\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'matvec\'], " + } + member_method { + name: "range_dimension_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'range_dimension_tensor\'], " + } + member_method { + name: "shape_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'shape_tensor\'], " + } + member_method { + name: "solve" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'adjoint_arg\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'solve\'], " + } + member_method { + name: "solvevec" + argspec: "args=[\'self\', \'rhs\', \'adjoint\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'solve\'], " + } + member_method { + name: "tensor_rank_tensor" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'tensor_rank_tensor\'], " + } + member_method { + name: "to_dense" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'to_dense\'], " + } + member_method { + name: "trace" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'trace\'], " + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt index 51b409bf80..4c94863caa 100644 --- a/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.linalg.pbtxt @@ -1,5 +1,37 @@ path: "tensorflow.linalg" tf_module { + member { + name: "LinearOperator" + mtype: "" + } + member { + name: "LinearOperatorComposition" + mtype: "" + } + member { + name: "LinearOperatorDiag" + mtype: "" + } + member { + name: "LinearOperatorFullMatrix" + mtype: "" + } + member { + name: "LinearOperatorIdentity" + mtype: "" + } + member { + name: "LinearOperatorLowRankUpdate" + mtype: "" + } + member { + name: "LinearOperatorLowerTriangular" + mtype: "" + } + member { + name: "LinearOperatorScaledIdentity" + mtype: "" + } member_method { name: "band_part" argspec: "args=[\'input\', \'num_lower\', \'num_upper\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From e540a893f14d9b0beea9161962694bf7d139caf3 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 10 Oct 2017 14:32:02 -0700 Subject: [PATCH 246/909] [XLA] Fix setting of changed_ in AlgebraicSimplifier. Due to this bug, sometimes AlgebraicSimplifier would make a change but say that it didn't. This would cause us to run the HLO simplification pipeline fewer times than we should. PiperOrigin-RevId: 171735154 --- .../xla/service/algebraic_simplifier.cc | 10 ++-- .../xla/service/algebraic_simplifier_test.cc | 48 +++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index dd97f3d876..a197a2accc 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -912,9 +912,10 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { // A Broadcast that feeds a unary element-wise operation can sink the // broadcast after the unary element-wise operation. TF_ASSIGN_OR_RETURN( - changed_, + bool sink_succeeded, TryToSinkReshapeOrBroadcastAfterOpWithUniqueNonScalarOperand(broadcast)); - if (changed_) { + changed_ |= sink_succeeded; + if (sink_succeeded) { return Status::OK(); } @@ -1217,9 +1218,10 @@ Status AlgebraicSimplifierVisitor::HandleReshape(HloInstruction* reshape) { // A Reshape that feeds a unary element-wise operation can sink the // reshape after the unary element-wise operation. TF_ASSIGN_OR_RETURN( - changed_, + bool sink_succeeded, TryToSinkReshapeOrBroadcastAfterOpWithUniqueNonScalarOperand(reshape)); - if (changed_) { + changed_ |= sink_succeeded; + if (sink_succeeded) { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index cf97a261da..52231b53d4 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1077,6 +1077,54 @@ TEST_F(AlgebraicSimplifierTest, ReshapeToScalarNotHoistedAfterEffectiveUnary) { op::Maximum(op::Reshape(param), zero)); } +// Regression test for a bug where if we failed to sink a reshape, we'd set the +// 'changed' bit in AlgebraicSimplifier to false. +TEST_F(AlgebraicSimplifierTest, FailureToSinkReshapeDoesntAffectChangedBit) { + HloComputation::Builder builder(TestName()); + + // This add (param0 + 0) can be simplified. + Shape shape = ShapeUtil::MakeShape(F32, {2, 2}); + HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, + builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")), + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR2({{0, 0}, {0, 0}}))))); + + builder.AddInstruction( + HloInstruction::CreateReshape(ShapeUtil::MakeShape(F32, {4}), add)); + + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + bitcasting_callback()); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); +} + +// Regression test for a bug where if we failed to sink a reshape, we'd set the +// 'changed' bit in AlgebraicSimplifier to false. +TEST_F(AlgebraicSimplifierTest, FailureToSinkBroadcastDoesntAffectChangedBit) { + HloComputation::Builder builder(TestName()); + + // This add (param0 + 0) can be simplified. + Shape shape = ShapeUtil::MakeShape(F32, {2, 2}); + HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, + builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")), + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR2({{0, 0}, {0, 0}}))))); + + builder.AddInstruction(HloInstruction::CreateBroadcast( + ShapeUtil::MakeShape(F32, {2, 2, 2}), add, /*broadcast_dimensions=*/{0})); + + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + bitcasting_callback()); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); +} + TEST_F(AlgebraicSimplifierTest, TransposeEqualsBitcast1) { HloComputation::Builder builder(TestName()); HloInstruction* param = -- GitLab From e3be40d099e1c5da869b7dfaf8d5891a8c2af312 Mon Sep 17 00:00:00 2001 From: "Jeffrey A. Dean" Date: Tue, 10 Oct 2017 15:36:59 -0700 Subject: [PATCH 247/909] Slightly rework tf.matmul to be more efficient (important for eager mode) PiperOrigin-RevId: 171745141 --- tensorflow/python/ops/math_ops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 9383d72f14..b572377e2f 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1870,11 +1870,12 @@ def matmul(a, b = conj(b) transpose_b = True - sparse_matmul_types = [dtypes.bfloat16, dtypes.float32] - use_sparse_matmul = (a.dtype in sparse_matmul_types and - b.dtype in sparse_matmul_types and - (a_is_sparse or b_is_sparse)) - if dtypes.bfloat16 in (a.dtype, b.dtype): + use_sparse_matmul = False + if a_is_sparse or b_is_sparse: + sparse_matmul_types = [dtypes.bfloat16, dtypes.float32] + use_sparse_matmul = (a.dtype in sparse_matmul_types and + b.dtype in sparse_matmul_types) + if a.dtype == dtypes.bfloat16 or b.dtype == dtypes.bfloat16: # matmul currently doesn't handle bfloat16 inputs. use_sparse_matmul = True if use_sparse_matmul: -- GitLab From abf9e8cd35e9e83371f3c3ec8e08a8a2d933c82b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 16:42:50 -0700 Subject: [PATCH 248/909] BUILD cleanup PiperOrigin-RevId: 171753811 --- tensorflow/contrib/boosted_trees/lib/BUILD | 33 ++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/BUILD b/tensorflow/contrib/boosted_trees/lib/BUILD index d4d405c3a9..9b3ffa98e3 100644 --- a/tensorflow/contrib/boosted_trees/lib/BUILD +++ b/tensorflow/contrib/boosted_trees/lib/BUILD @@ -262,6 +262,8 @@ py_library( srcs = ["learner/batch/base_split_handler.py"], srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/boosted_trees:batch_ops_utils_py", + "//tensorflow/python:control_flow_ops", ], ) @@ -271,9 +273,13 @@ py_library( srcs_version = "PY2AND3", deps = [ ":base_split_handler", - "//tensorflow/contrib/boosted_trees:quantile_ops_py", "//tensorflow/contrib/boosted_trees:split_handler_ops_py", "//tensorflow/contrib/boosted_trees:stats_accumulator_ops_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:math_ops", ], ) @@ -285,7 +291,15 @@ py_test( ":categorical_split_handler", "//tensorflow/contrib/boosted_trees/proto:learner_proto_py", "//tensorflow/contrib/boosted_trees/proto:split_info_proto_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//tensorflow/python:resources", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", ], ) @@ -298,7 +312,14 @@ py_library( "//tensorflow/contrib/boosted_trees:quantile_ops_py", "//tensorflow/contrib/boosted_trees:split_handler_ops_py", "//tensorflow/contrib/boosted_trees:stats_accumulator_ops_py", - "//tensorflow/contrib/boosted_trees/proto:quantiles_proto_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:function", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", ], ) @@ -310,7 +331,15 @@ py_test( ":ordinal_split_handler", "//tensorflow/contrib/boosted_trees/proto:learner_proto_py", "//tensorflow/contrib/boosted_trees/proto:split_info_proto_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//tensorflow/python:resources", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", ], ) -- GitLab From 010506f4feb93ff210fe92d5b48b8b6da56fea9b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 17:01:56 -0700 Subject: [PATCH 249/909] Fix docstring typos in tf.distributions.bijectors.Bijector. PiperOrigin-RevId: 171756150 --- tensorflow/python/ops/distributions/bijector_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/distributions/bijector_impl.py b/tensorflow/python/ops/distributions/bijector_impl.py index 1f07b0c91d..8f6d18d91a 100644 --- a/tensorflow/python/ops/distributions/bijector_impl.py +++ b/tensorflow/python/ops/distributions/bijector_impl.py @@ -158,7 +158,7 @@ class Bijector(object): # Evaluate forward transformation. fwd_x = my_bijector.forward(x) x == my_bijector.inverse(fwd_x) - x != my_bijector.forward(fwd_x) # Not equal because g(x) != g(g(x)). + x != my_bijector.forward(fwd_x) # Not equal because x != g(g(x)). ``` - Computing a log-likelihood: @@ -275,7 +275,7 @@ class Bijector(object): implies `g^{-1}` is differentiable in the image of `g`. Applying the chain rule to `y = g(x) = g(g^{-1}(y))` yields `I = g'(g^{-1}(y))*g^{-1}'(y)`. - The same theorem also implies `g{-1}'` is non-singular therefore: + The same theorem also implies `g^{-1}'` is non-singular therefore: `inv[ g'(g^{-1}(y)) ] = g^{-1}'(y)`. The claim follows from [properties of determinant]( https://en.wikipedia.org/wiki/Determinant#Multiplicativity_and_matrix_groups). -- GitLab From 36019666303cd474f5afd0235272c004536fb810 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Tue, 10 Oct 2017 17:22:48 -0700 Subject: [PATCH 250/909] Add an option to apply ModelPruner when building a grappler item and an option to provide specific feed nodes to the item builder. PiperOrigin-RevId: 171758733 --- tensorflow/core/grappler/BUILD | 1 + .../core/grappler/grappler_item_builder.cc | 53 ++++++++++++++++++- .../core/grappler/grappler_item_builder.h | 5 ++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 3f2cd2ddbf..678f8da298 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -100,6 +100,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler/inputs:utils", + "//tensorflow/core/grappler/optimizers:model_pruner", ], ) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index d23facf81a..54d60cd7aa 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/grappler/inputs/utils.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/protobuf_internal.h" @@ -133,12 +134,24 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, ConvertGraphDefToGraph(graph_ctor_opts, graph_def, graphptr.get())); // Optimize the graph. - GraphOptimizer optimizer(*optimizer_opts); + ::tensorflow::GraphOptimizer optimizer(*optimizer_opts); optimizer.Optimize(flr, env, devices[0], &graphptr, /*shape_map=*/nullptr); graphptr->ToGraphDef(output_graph_def); return Status::OK(); } + +// Applies the same graph pruning logic to the graph as Session.Run in TF. +// If the returned status is not OK, item state may be inconsistent. +Status PruneGraph(GrapplerItem* item) { + ModelPruner pruner; + GraphDef pruned_graph; + Cluster* cluster = nullptr; // ModelPruner doesn't check cluster. + TF_RETURN_IF_ERROR(pruner.Optimize(cluster, *item, &pruned_graph)); + item->graph = std::move(pruned_graph); + return Status::OK(); +} + } // namespace // static @@ -152,6 +165,18 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( new_item->id = id; new_item->graph = meta_graph.graph_def(); + // Fill in feed nodes from config, if any provided. + for (const auto& feed_node : cfg.feed_nodes) { + const string feed_name = NodeName(feed_node); + if (feed_name.empty()) { + LOG(ERROR) << "Invalid feed node name " << feed_node + << ", skipping this input."; + return nullptr; + } + LOG(INFO) << "Will use feed node " << feed_name; + new_item->feed.emplace_back(feed_name, Tensor()); + } + // Attempt to detect the fetch node(s). if (meta_graph.collection_def().count("train_op") > 0) { const CollectionDef& nodes = meta_graph.collection_def().at("train_op"); @@ -339,9 +364,23 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( } } } + Tensor fake_input(type, shape); InitializeTensor(type, &fake_input); - new_item->feed.emplace_back(node.name(), fake_input); + + if (cfg.feed_nodes.empty()) { + // No specific feed nodes were given. Assume all placeholders are fed. + new_item->feed.emplace_back(node.name(), fake_input); + } else if (cfg.feed_nodes.count(node.name()) > 0) { + // If specific feed nodes were given, only update their tensors. + auto it = find_if(new_item->feed.begin(), new_item->feed.end(), + [&node](std::pair& f) { + return f.first == node.name(); + }); + QCHECK(it != new_item->feed.end()); + it->second = fake_input; + } + // Set the shape of the node in the graph. This is needed for statically // inferring shapes and is a no-op when dynamically inferring shapes as // the Placeholder shape will match the shape passed from new_item->feed. @@ -418,6 +457,16 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( return nullptr; } + if (cfg.prune_graph) { + VLOG(1) << "Pruning graph..."; + auto status = PruneGraph(new_item.get()); + if (!status.ok()) { + LOG(ERROR) << "Pruning failed: " << status.error_message(); + return nullptr; + } + VLOG(1) << "Pruning ran succesfully."; + } + // Validate feed, fetch and init nodes std::unordered_set nodes; for (const auto& node : new_item->graph.node()) { diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index 9a7f52228b..85151aabea 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_GRAPPLER_GRAPPLER_ITEM_BUILDER_H_ #include +#include #include #include "tensorflow/core/grappler/grappler_item.h" @@ -45,6 +46,10 @@ struct ItemConfig { bool erase_noinline_attributes = false; // If non-empty, override the directory of asset paths. string assets_directory_override; + // If true, runs ModelPruner on the graph. + bool prune_graph = false; + // Override feed nodes list. + std::set feed_nodes; }; // Factory method for creating a GrapplerItem from a MetaGraphDef. -- GitLab From d4d5e1510f2404ff1dafaa83171b0dcaec5fdfeb Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 10 Oct 2017 17:30:35 -0700 Subject: [PATCH 251/909] [XLA] Simplify trivial dynamic-slices. Also make the dynamic-update-slice simplification respect the is_layout_sensitive_ flag in algebraic-simplifier While we're here, make the algebraic-simplifier test use the new HloVerifiedTestBase class. PiperOrigin-RevId: 171759708 --- tensorflow/compiler/xla/service/BUILD | 2 +- .../xla/service/algebraic_simplifier.cc | 8 ++++-- .../xla/service/algebraic_simplifier_test.cc | 27 +++++++++++++++++-- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 0c20a05714..c1bb7107b6 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1053,7 +1053,7 @@ tf_cc_test( "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep "//tensorflow/core:lib", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index a197a2accc..90ab7700ea 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1264,6 +1264,11 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( if (ShapeUtil::IsScalar(dynamic_slice->shape())) { return ReplaceInstruction(dynamic_slice, operand); } + // DynamicSlice where operand has the same size as the output and + // start_indices are all zero is simply equal to operand. + if (IsAll(start_indices, 0) && SameShape(operand, dynamic_slice)) { + return ReplaceInstruction(dynamic_slice, operand); + } return Status::OK(); } @@ -1282,8 +1287,7 @@ Status AlgebraicSimplifierVisitor::HandleDynamicUpdateSlice( // not to affect the visible behavior of this op even when the indices are out // of range. Currently dynamic-update-slice wraps out-of-range indices, so // we can only remove the op if its indices never wrap.) - if (start_indices->IsConstant() && start_indices->literal().IsAll(0) && - ShapeUtil::Compatible(dynamic_update_slice->shape(), update->shape())) { + if (IsAll(start_indices, 0) && SameShape(dynamic_update_slice, update)) { return ReplaceInstruction(dynamic_update_slice, update); } return Status::OK(); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 52231b53d4..f45e541b2c 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_pass_fix.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -47,7 +47,7 @@ AlgebraicSimplifier::ValidBitcastCallback non_bitcasting_callback() { return [](const Shape&, const Shape&) { return false; }; } -class AlgebraicSimplifierTest : public HloTestBase { +class AlgebraicSimplifierTest : public HloVerifiedTestBase { public: // Makes a computation that contains a loop that runs num_iters times. HloComputation* MakeSimpleLoop(HloModule* module, int num_iters); @@ -2213,6 +2213,29 @@ TEST_F(AlgebraicSimplifierTest, NotRemovedIfContainsNonRemovableInstruction) { EXPECT_FALSE(simplifier.Run(&module).ValueOrDie()); } +// A dynamic-slice is trivial if its start indices are all zeroes and the size +// of its input equals the size of its output. In this case, the dynamic slice +// is equal to its input. +TEST_F(AlgebraicSimplifierTest, TrivialDynamicSlice) { + HloComputation::Builder builder(TestName()); + + Shape shape = ShapeUtil::MakeShape(F32, {10, 100, 1000}); + builder.AddInstruction(HloInstruction::CreateDynamicSlice( + shape, + builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "slice_from")), + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({0, 0, 0}))), + /*slice_sizes=*/{10, 100, 1000})); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + EXPECT_THAT(computation->root_instruction(), op::Parameter()); +} + // A dynamic-update-slice is trivial if its start indices are all zeroes and the // size of its "update" equals the size of its output. In this case, the // dynamic-update-slice is equal to its update. -- GitLab From 9a7e849472c954470de889cc8873223e4db1e4df Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 18:40:50 -0700 Subject: [PATCH 252/909] * Passing `training_features` (without weight column) instead of `features` into GradientBoostedDecisionTreeModel. * Export GTFlow model into generic format with features defined in proto. PiperOrigin-RevId: 171766066 --- .../estimator_batch/custom_export_strategy.py | 9 +++++++-- .../contrib/boosted_trees/estimator_batch/model.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index 7773125c16..a800c3ddc7 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -96,7 +96,8 @@ def make_custom_export_strategy(name, def convert_to_universal_format(dtec, sorted_feature_names, num_dense, num_sparse_float, - num_sparse_int): + num_sparse_int, + feature_name_to_proto=None): """Convert GTFlow trees to universal format.""" del num_sparse_int # unused. model_and_features = generic_tree_model_pb2.ModelAndFeatures() @@ -104,7 +105,11 @@ def convert_to_universal_format(dtec, sorted_feature_names, # feature is processed before it's fed to the model (e.g. bucketing # information). As of now, this serves as a list of features the model uses. for feature_name in sorted_feature_names: - model_and_features.features[feature_name].SetInParent() + if not feature_name_to_proto: + model_and_features.features[feature_name].SetInParent() + else: + model_and_features.features[feature_name].CopyFrom( + feature_name_to_proto[feature_name]) model = model_and_features.model model.ensemble.summation_combination_technique.SetInParent() for tree_idx in range(len(dtec.trees)): diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index 8cda5c8f2b..c6455a7ea3 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -93,7 +93,7 @@ def model_builder(features, labels, mode, params, config): learner_config=learner_config, feature_columns=feature_columns, logits_dimension=head.logits_dimension, - features=features) + features=training_features) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) logits = predictions_dict["predictions"] -- GitLab From 9885aa8636c51bdd4a155b504b7c8c22bdf22289 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 10 Oct 2017 19:27:45 -0700 Subject: [PATCH 253/909] Add some CPU specific test cases PiperOrigin-RevId: 171769504 --- tensorflow/BUILD | 1 + tensorflow/compiler/xla/tests/cpu/BUILD | 99 ++++++ .../xla/tests/cpu/cpu_bytesizeof_test.cc | 37 ++ .../compiler/xla/tests/cpu/cpu_codegen_test.h | 30 ++ .../tests/cpu/cpu_external_constants_test.cc | 73 ++++ .../compiler/xla/tests/cpu/cpu_fusion_test.cc | 330 ++++++++++++++++++ .../xla/tests/cpu/cpu_intrinsic_test.cc | 150 ++++++++ 7 files changed, 720 insertions(+) create mode 100644 tensorflow/compiler/xla/tests/cpu/BUILD create mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc create mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h create mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc create mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc create mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 5bb31d7df1..065e61efca 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -330,6 +330,7 @@ filegroup( "//tensorflow/compiler/xla/service/interpreter:all_files", "//tensorflow/compiler/xla/service/llvm_ir:all_files", "//tensorflow/compiler/xla/tests:all_files", + "//tensorflow/compiler/xla/tests/cpu:all_files", "//tensorflow/compiler/xla/tools:all_files", "//tensorflow/contrib:all_files", "//tensorflow/contrib/all_reduce:all_files", diff --git a/tensorflow/compiler/xla/tests/cpu/BUILD b/tensorflow/compiler/xla/tests/cpu/BUILD new file mode 100644 index 0000000000..e0253b6a6b --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/BUILD @@ -0,0 +1,99 @@ +# Description: +# Tests for CPU, in C++, against the XLA API, using the in-process +# client library. + +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = [":friends"], +) + +package_group( + name = "friends", + includes = [ + "//tensorflow/compiler/xla:friends", + ], +) + +load("//tensorflow:tensorflow.bzl", "tf_cc_test") + +tf_cc_test( + name = "cpu_fusion_test", + srcs = ["cpu_fusion_test.cc"], + deps = [ + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_instruction_fusion", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_bytesizeof_test", + srcs = ["cpu_bytesizeof_test.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_external_constants_test", + srcs = ["cpu_external_constants_test.cc"], + deps = [ + ":cpu_codegen_test", + "//tensorflow/compiler/xla:array2d", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/core:test", + ], +) + +cc_library( + name = "cpu_codegen_test", + testonly = True, + hdrs = ["cpu_codegen_test.h"], + deps = [ + "//tensorflow/compiler/xla/service:cpu_plugin", + "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_cc_test( + name = "cpu_intrinsic_test", + srcs = ["cpu_intrinsic_test.cc"], + deps = [ + ":cpu_codegen_test", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +# ----------------------------------------------------------------------------- + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc new file mode 100644 index 0000000000..3f2bbbd076 --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/core/platform/test.h" + +class CpuByteSizeOfTest : public ::testing::Test {}; + +TEST_F(CpuByteSizeOfTest, ARM32) { + llvm::DataLayout data_layout( + "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"); + auto tuple_shape = + xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); + EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), + data_layout.getPointerSize()); +} + +TEST_F(CpuByteSizeOfTest, ARM64) { + llvm::DataLayout data_layout("e-m:e-i64:64-i128:128-n32:64-S128"); + auto tuple_shape = + xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); + EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), + data_layout.getPointerSize()); +} diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h b/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h new file mode 100644 index 0000000000..a6ca00b07d --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h @@ -0,0 +1,30 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ +#define PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ + +#include "tensorflow/compiler/xla/tests/llvm_irgen_test_base.h" + +namespace xla { +namespace cpu { + +// Tests that verify IR emitted by the CPU backend is as expected. +class CpuCodegenTest : public LLVMIRGenTestBase {}; + +} // namespace cpu +} // namespace xla + +#endif // PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc new file mode 100644 index 0000000000..14f223e05e --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc @@ -0,0 +1,73 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { +class CpuExternalConstantsTest : public CpuCodegenTest { + public: + void TestWithArray(int64 rows, int64 cols, const char* filecheck_pattern) { + HloComputation::Builder builder(TestName()); + + Array2D backing_array(rows, cols); + backing_array.FillUnique(); + + auto shape = ShapeUtil::MakeShape(F32, {rows, cols}); + + HloInstruction* constant = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR2FromArray2D(backing_array))); + HloInstruction* param = + builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "x")); + builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param, constant)); + + std::unique_ptr module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CompileAndVerifyIr(std::move(module), filecheck_pattern, + /*match_optimized_ir=*/false); + } +}; + +TEST_F(CpuExternalConstantsTest, Basic) { + TestWithArray(/*rows=*/1024, /*cols=*/1024, R"( +CHECK: @constant_global_0 = external constant [1024 x [1024 x float]], align 16 +)"); +} + +TEST_F(CpuExternalConstantsTest, BasicNegative) { + // The constant array in this test case is small enough that there is no need + // to externalize it. + TestWithArray(/*rows=*/4, /*cols=*/4, R"( +CHECK-NOT: @constant_global_0 = external constant [4 x [4 x float]], align 8 +CHECK: @0 = private constant [4 x [4 x float]] {{.*}}, align 8 +)"); +} +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc new file mode 100644 index 0000000000..9231d3960e --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc @@ -0,0 +1,330 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { + +class CpuFusionTest : public HloTestBase { + protected: + CpuFusionTest() {} + + ErrorSpec error_spec_{0.0001, 1e-5}; +}; + +TEST_F(CpuFusionTest, FuseTwoElementwiseOps) { + auto builder = HloComputation::Builder(TestName()); + auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); + auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); + Shape vshape = input_literal1->shape(); + + auto input1 = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal1))); + auto input2 = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal2))); + + auto add1 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kAdd, input1, input2)); + builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, add1)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + auto fusion_instruction = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); + EXPECT_EQ(HloOpcode::kNegate, + fusion_instruction->fused_expression_root()->opcode()); + // There should be four fused instructions: 2 parameters, the add, and the + // negate. + EXPECT_EQ(4, fusion_instruction->fused_instruction_count()); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({1.0, 40.0, -5.0}, *result, error_spec_); +} + +TEST_F(CpuFusionTest, FuseElementwiseOpChain) { + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); + Shape vshape = input_literal->shape(); + + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kExp, ceil)); + auto floor = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kFloor, exp)); + auto two = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, two, floor)); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + auto fusion_instruction = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); + EXPECT_EQ(HloOpcode::kMultiply, + fusion_instruction->fused_expression_root()->opcode()); + // There should be 7 fused instructions: 2 parameters and the fused + // operations. + EXPECT_EQ(7, fusion_instruction->fused_instruction_count()); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0}, *result, + error_spec_); +} + +TEST_F(CpuFusionTest, ElementwiseOpChainWithNonfusableInstruction) { + // Test a chain of fusable ops with a non-fusable op (a reduce) thrown in the + // middle. + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); + Shape vshape = input_literal->shape(); + + auto input = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + + auto cshape = ShapeUtil::MakeShape(F32, {6}); + auto concatenate = builder.AddInstruction( + HloInstruction::CreateConcatenate(cshape, {ceil, ceil}, /*dimension=*/0)); + + // Build an x+y computation to use in a reduce. + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + auto embedded_builder = HloComputation::Builder("f32+f32"); + embedded_builder.AddInstruction(HloInstruction::CreateBinary( + r0f32, HloOpcode::kAdd, + embedded_builder.AddInstruction( + HloInstruction::CreateParameter(0, r0f32, "x")), + embedded_builder.AddInstruction( + HloInstruction::CreateParameter(1, r0f32, "y")))); + auto add_f32 = module->AddEmbeddedComputation(embedded_builder.Build()); + + // This is a nop reduction. + auto reduce = builder.AddInstruction(HloInstruction::CreateReduce( + cshape, + builder.AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeShape(F32, {6, 1}), concatenate)), + /*init_value=*/ + builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(0))), + /*dimensions_to_reduce=*/{1}, add_f32)); + + auto exp = builder.AddInstruction( + HloInstruction::CreateUnary(cshape, HloOpcode::kExp, reduce)); + auto floor = builder.AddInstruction( + HloInstruction::CreateUnary(cshape, HloOpcode::kFloor, exp)); + auto two = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(2.0))); + builder.AddInstruction( + HloInstruction::CreateBinary(cshape, HloOpcode::kMultiply, two, floor)); + + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The computation root instruction was fused. Verify the fusion instruction + // is now the root. + auto computation = module->entry_computation(); + + auto fusion_instruction1 = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); + EXPECT_EQ(HloOpcode::kMultiply, + fusion_instruction1->fused_expression_root()->opcode()); + // There should be 5 fused instructions in the root fusion instruction: 2 + // parameters, multiply, floor, and exp. + EXPECT_EQ(5, fusion_instruction1->fused_instruction_count()) + << fusion_instruction1->fused_instructions_computation()->ToString(); + + auto fusion_instruction2 = reduce->operand(0); + EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); + EXPECT_EQ(HloOpcode::kReshape, + fusion_instruction2->fused_expression_root()->opcode()); + // There should be 5 fused instructions in the second fusion instruction: 1 + // parameter, negate, ceil, concat, and reshape. + EXPECT_EQ(5, fusion_instruction2->fused_instruction_count()) + << fusion_instruction2->fused_instructions_computation()->ToString(); + + // Compile and execute the computation. + auto result = ExecuteAndTransfer(std::move(module), {}); + + // Check the output correctness. + LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0, 14.0, 40.0, 40.0}, + *result, error_spec_); +} + +TEST_F(CpuFusionTest, TestOperandOrderToAvoidDuplication) { + // Test that the operands of an instruction to be fused are considered in the + // proper order to avoid duplication. Test input: + // + // constant = {...} + // negate = neg(constant) + // ceil = ceil(negate) + // add1 = add(negate, ceil) + // add2 = add(ceil, negate) + // + // In this example, the operands of both add1 and add2 should be fused in the + // order {ceil, negate} even though they have different orders in their + // operand vectors. Test for this problem by counting the number of nodes in + // each fusion instruction to ensure that negate is not duplicated. + auto builder = HloComputation::Builder(TestName()); + auto input_literal = Literal::CreateR1({1.0, 2.0, 3.0}); + Shape vshape = input_literal->shape(); + + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(std::move(input_literal))); + auto negate = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, constant)); + auto ceil = builder.AddInstruction( + HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); + + auto add1 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, negate, ceil)); + auto add2 = builder.AddInstruction( + HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, ceil, negate)); + + // Tie together the two adds with a tuple to create a single root. + auto result = + builder.AddInstruction(HloInstruction::CreateTuple({add1, add2})); + + // Create computation and module. + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + // Run fusion. + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + auto fusion1 = result->operand(0); + auto fusion2 = result->operand(1); + EXPECT_EQ(HloOpcode::kFusion, fusion1->opcode()); + EXPECT_EQ(HloOpcode::kFusion, fusion2->opcode()); + + // Each fusion instruction should have 4 fused instruction inside: add, ceil, + // negate, and the fused parameter. + EXPECT_EQ(4, fusion1->fused_instruction_count()); + EXPECT_EQ(4, fusion2->fused_instruction_count()); + + // Each fusion instruction should have one parameter and the parameter should + // be the constant. + EXPECT_EQ(1, fusion1->operand_count()); + EXPECT_EQ(constant, fusion1->operand(0)); + EXPECT_EQ(1, fusion2->operand_count()); + EXPECT_EQ(constant, fusion2->operand(0)); +} + +TEST_F(CpuFusionTest, DoNotDuplicateExpensiveOps) { + // Verify that expensive operations will not be fused if the fusion results in + // duplication. Test code: + // + // constant = 42.0 + // exp1 = exp(constant) + // negate1 = negate(exp1) + // exp2 = exp(constant) + // negate2 = negate(exp2) + // tuple = tuple(negate1, negate2, exp2) + // + // exp1 should be fused down into negate1, but exp2 will not be fused into + // negate2 because this will result in duplication of the expensive exp + // computation. The duplication is caused by the other use of exp2 in the + // tuple. + auto builder = HloComputation::Builder(TestName()); + auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); + auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42.0))); + Shape shape = constant->shape(); + + auto exp1 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp1)); + + auto exp2 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); + auto negate2 = builder.AddInstruction( + HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp2)); + + auto tuple = builder.AddInstruction( + HloInstruction::CreateTuple({negate1, negate2, exp2})); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + CpuInstructionFusion fusion; + EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); + + // The only fusion instruction should be operand 0 of the tuple (formerly + // negate1). + EXPECT_EQ(HloOpcode::kFusion, tuple->operand(0)->opcode()); + EXPECT_EQ(HloOpcode::kNegate, tuple->operand(1)->opcode()); + EXPECT_EQ(HloOpcode::kExp, tuple->operand(2)->opcode()); + + auto fusion_inst = tuple->operand(0); + // There should be three fused instructions: negate2, exp2, and the fused + // parameter. + EXPECT_EQ(3, fusion_inst->fused_instruction_count()); + EXPECT_EQ(1, fusion_inst->operand_count()); + EXPECT_EQ(constant, fusion_inst->operand(0)); +} + +} // namespace +} // namespace cpu +} // namespace xla diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc new file mode 100644 index 0000000000..15a8a44e4c --- /dev/null +++ b/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc @@ -0,0 +1,150 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace cpu { +namespace { + +const char* const kTriple_x86_64 = "x86_64-pc-linux"; +const char* const kTriple_android_arm = "armv7-none-android"; + +struct IntrinsicTestSpec { + HloOpcode opcode; + tensorflow::StringPiece triple; + tensorflow::StringPiece features; + tensorflow::StringPiece check_lines; +}; + +// Tests that unary functions get lowered using intrinsic calls. +class CpuUnaryIntrinsicTest + : public CpuCodegenTest, + public ::testing::WithParamInterface { + public: + static string Name(const ::testing::TestParamInfo& info) { + auto spec = info.param; + + string opcode = HloOpcodeString(spec.opcode); + opcode[0] = toupper(opcode[0]); + + string triple{spec.triple.data(), spec.triple.size()}; + if (triple == kTriple_x86_64) { + triple = "x86_64"; + } else if (triple == kTriple_android_arm) { + triple = "android_arm"; + } else { + triple = "Unknown"; + } + + string features{spec.features.data(), spec.features.size()}; + if (!features.empty()) { + std::replace_if(features.begin(), features.end(), + [](char c) { return c != '_' && !isalnum(c); }, '_'); + } else { + features = ""; + } + + return tensorflow::strings::StrCat(opcode.c_str(), "_On_", triple.c_str(), + features.empty() ? "" : "_With", + features.c_str()); + } +}; + +// Creates a module with a call to the unary op, and tests if the +// compiler replaced it with a call to the intrinsic. +TEST_P(CpuUnaryIntrinsicTest, DoIt) { + HloComputation::Builder builder(TestName()); + IntrinsicTestSpec spec = GetParam(); + + auto param_shape = ShapeUtil::MakeShape(F32, {1024}); + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, param_shape, "input")); + builder.AddInstruction( + HloInstruction::CreateUnary(param_shape, spec.opcode, param)); + std::unique_ptr computation = builder.Build(); + + string triple{spec.triple.data(), spec.triple.size()}; + string features{spec.features.data(), spec.features.size()}; + + CpuAotCompilationOptions options{ + /*triple=*/triple, /*cpu_name=*/"", /*features=*/features, + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + auto hlo_module = CreateNewModule(); + hlo_module->AddEntryComputation(std::move(computation)); + + string check_lines{spec.check_lines.data(), spec.check_lines.size()}; + + CompileAheadOfTimeAndVerifyIr(std::move(hlo_module), options, check_lines, + /*match_optimized_ir=*/true); +} + +IntrinsicTestSpec CpuUnaryIntrinsicTestCases[] = { + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_x86_64, "+sse4.1", + R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_ExpV4F32SSE(<4 x float> %wide.load))"}, + + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_x86_64, "+avx", + R"(CHECK: call fast <8 x float> @__xla_cpu_runtime_ExpV8F32AVX(<8 x float> %wide.load))"}, + + IntrinsicTestSpec{ + HloOpcode::kExp, kTriple_android_arm, "+neon", + R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_ExpV4F32NEON(<4 x float> %wide.load))"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_x86_64, "+sse4.1", + R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_LogV4F32SSE(<4 x float> %wide.load))"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_x86_64, "+avx", + R"(CHECK: call fast <8 x float> @__xla_cpu_runtime_LogV8F32AVX(<8 x float> %wide.load))"}, + + IntrinsicTestSpec{ + HloOpcode::kLog, kTriple_android_arm, "+neon", + R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_LogV4F32NEON(<4 x float> %wide.load))"}, + + // Tanh is inlined, so we match a line from it instead of a function call. + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_x86_64, "", + R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}, + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_x86_64, "+avx", + R"(CHECK: fcmp fast uge <8 x float> %wide.load, )"}, + + IntrinsicTestSpec{ + HloOpcode::kTanh, kTriple_android_arm, "", + R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}}; + +INSTANTIATE_TEST_CASE_P(CpuUnaryIntrinsicTestInstantiation, + CpuUnaryIntrinsicTest, + ::testing::ValuesIn(CpuUnaryIntrinsicTestCases), + CpuUnaryIntrinsicTest::Name); + +} // namespace +} // namespace cpu +} // namespace xla -- GitLab From 00b368966c8c3e003d2a7ddf3c36165185ed0079 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Tue, 10 Oct 2017 20:22:50 -0700 Subject: [PATCH 254/909] Minor code cleanup in grappler cost estimation. PiperOrigin-RevId: 171772766 --- .../grappler/costs/op_level_cost_estimator.cc | 27 ++++++++++--------- .../grappler/costs/op_level_cost_estimator.h | 13 +++++---- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index b25def7612..7a1295c91e 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -292,21 +292,21 @@ Costs OpLevelCostEstimator::PredictCosts(const OpContext& op_context) const { return costs; } -std::pair OpLevelCostEstimator::GetDeviceInfo( +OpLevelCostEstimator::DeviceInfo OpLevelCostEstimator::GetDeviceInfo( const DeviceProperties& device) const { double gflops = -1; - double bandwidth = -1; + double gb_per_sec = -1; if (device.type() == "CPU") { // Check if vector instructions are available, and refine performance // prediction based on this. // Frequencies are stored in MHz in the DeviceProperties. gflops = device.num_cores() * device.frequency() * 1e-3; - if (bandwidth < 0) { + if (gb_per_sec < 0) { if (device.bandwidth() > 0) { - bandwidth = device.bandwidth() / 1e6; + gb_per_sec = device.bandwidth() / 1e6; } else { - bandwidth = 32; + gb_per_sec = 32; } } } else if (device.type() == "GPU") { @@ -328,15 +328,15 @@ std::pair OpLevelCostEstimator::GetDeviceInfo( gflops = device.num_cores() * device.frequency() * 1e-3 * cores_per_multiprocessor * kOpsPerMac; if (device.bandwidth() > 0) { - bandwidth = device.bandwidth() / 1e6; + gb_per_sec = device.bandwidth() / 1e6; } else { - bandwidth = 100; + gb_per_sec = 100; } } - VLOG(1) << "Device: " << device.type() << " GFLOPS: " << gflops - << " Bandwidth: " << bandwidth; + VLOG(1) << "Device: " << device.type() << " gflops: " << gflops + << " gb_per_sec: " << gb_per_sec; - return std::make_pair(gflops, bandwidth); + return {gflops, gb_per_sec}; } Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { @@ -382,8 +382,8 @@ Costs OpLevelCostEstimator::DummyExecutionTime( Costs OpLevelCostEstimator::PredictOpCountBasedCost( double operations, const OpInfo& op_features) const { - std::pair device_perf = GetDeviceInfo(op_features.device()); - Costs::NanoSeconds compute_cost(std::ceil(operations / device_perf.first)); + DeviceInfo device_perf = GetDeviceInfo(op_features.device()); + Costs::NanoSeconds compute_cost(std::ceil(operations / device_perf.gigaops)); VLOG(1) << "Op:" << op_features.op() << " GOps:" << operations / 1e9 << " Execution Time (ns):" << compute_cost.count(); @@ -394,7 +394,8 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( CalculateOutputSize(op_features, &found_unknown_shapes); double total_io_size = total_input_size + total_output_size; - Costs::NanoSeconds memory_cost(std::ceil(total_io_size / device_perf.second)); + Costs::NanoSeconds memory_cost( + std::ceil(total_io_size / device_perf.gb_per_sec)); VLOG(1) << "Op:" << op_features.op() << " Size (KB):" << (total_io_size) / 1e3 << " Memory Time (ns):" << memory_cost.count(); diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index 0e63299bcb..3a8385dd73 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -36,11 +36,14 @@ class OpLevelCostEstimator { virtual Costs PredictCosts(const OpContext& op_context) const; protected: - // Returns an estimate of device performance (in billions of operations - // executed per second) and memory bandwidth (in GigaBytes/second) for the - // specified device. - virtual std::pair GetDeviceInfo( - const DeviceProperties& device) const; + // Basic device performance info, sufficient for roofline estimate. + struct DeviceInfo { + double gigaops; // Billions of operations executed per second. + double gb_per_sec; // Bandwidth to main memory in GB per second. + }; + + // Returns basic device performance info. + virtual DeviceInfo GetDeviceInfo(const DeviceProperties& device) const; // For operations for which we haven't yet built estimates, returns a dummy // value based on input size. -- GitLab From 4385bb907f3decea03d73b3f0a725613fa49a8f4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 10 Oct 2017 20:58:01 -0700 Subject: [PATCH 255/909] Automated g4 rollback of changelist 171769504 PiperOrigin-RevId: 171774816 --- tensorflow/BUILD | 1 - tensorflow/compiler/xla/tests/cpu/BUILD | 99 ------ .../xla/tests/cpu/cpu_bytesizeof_test.cc | 37 -- .../compiler/xla/tests/cpu/cpu_codegen_test.h | 30 -- .../tests/cpu/cpu_external_constants_test.cc | 73 ---- .../compiler/xla/tests/cpu/cpu_fusion_test.cc | 330 ------------------ .../xla/tests/cpu/cpu_intrinsic_test.cc | 150 -------- 7 files changed, 720 deletions(-) delete mode 100644 tensorflow/compiler/xla/tests/cpu/BUILD delete mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc delete mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h delete mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc delete mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc delete mode 100644 tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 065e61efca..5bb31d7df1 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -330,7 +330,6 @@ filegroup( "//tensorflow/compiler/xla/service/interpreter:all_files", "//tensorflow/compiler/xla/service/llvm_ir:all_files", "//tensorflow/compiler/xla/tests:all_files", - "//tensorflow/compiler/xla/tests/cpu:all_files", "//tensorflow/compiler/xla/tools:all_files", "//tensorflow/contrib:all_files", "//tensorflow/contrib/all_reduce:all_files", diff --git a/tensorflow/compiler/xla/tests/cpu/BUILD b/tensorflow/compiler/xla/tests/cpu/BUILD deleted file mode 100644 index e0253b6a6b..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/BUILD +++ /dev/null @@ -1,99 +0,0 @@ -# Description: -# Tests for CPU, in C++, against the XLA API, using the in-process -# client library. - -licenses(["notice"]) # Apache 2.0 - -package( - default_visibility = [":friends"], -) - -package_group( - name = "friends", - includes = [ - "//tensorflow/compiler/xla:friends", - ], -) - -load("//tensorflow:tensorflow.bzl", "tf_cc_test") - -tf_cc_test( - name = "cpu_fusion_test", - srcs = ["cpu_fusion_test.cc"], - deps = [ - "//tensorflow/compiler/xla:literal_util", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:util", - "//tensorflow/compiler/xla:xla_data_proto", - "//tensorflow/compiler/xla/service:cpu_plugin", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service/cpu:cpu_instruction_fusion", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:literal_test_util", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - -tf_cc_test( - name = "cpu_bytesizeof_test", - srcs = ["cpu_bytesizeof_test.cc"], - deps = [ - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - -tf_cc_test( - name = "cpu_external_constants_test", - srcs = ["cpu_external_constants_test.cc"], - deps = [ - ":cpu_codegen_test", - "//tensorflow/compiler/xla:array2d", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/tests:filecheck", - "//tensorflow/core:test", - ], -) - -cc_library( - name = "cpu_codegen_test", - testonly = True, - hdrs = ["cpu_codegen_test.h"], - deps = [ - "//tensorflow/compiler/xla/service:cpu_plugin", - "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - -tf_cc_test( - name = "cpu_intrinsic_test", - srcs = ["cpu_intrinsic_test.cc"], - deps = [ - ":cpu_codegen_test", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service/cpu:cpu_compiler", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - -# ----------------------------------------------------------------------------- - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc deleted file mode 100644 index 3f2bbbd076..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/cpu_bytesizeof_test.cc +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/core/platform/test.h" - -class CpuByteSizeOfTest : public ::testing::Test {}; - -TEST_F(CpuByteSizeOfTest, ARM32) { - llvm::DataLayout data_layout( - "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"); - auto tuple_shape = - xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); - EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), - data_layout.getPointerSize()); -} - -TEST_F(CpuByteSizeOfTest, ARM64) { - llvm::DataLayout data_layout("e-m:e-i64:64-i128:128-n32:64-S128"); - auto tuple_shape = - xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {})}); - EXPECT_EQ(xla::llvm_ir::ByteSizeOf(tuple_shape, data_layout), - data_layout.getPointerSize()); -} diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h b/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h deleted file mode 100644 index a6ca00b07d..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ -#define PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ - -#include "tensorflow/compiler/xla/tests/llvm_irgen_test_base.h" - -namespace xla { -namespace cpu { - -// Tests that verify IR emitted by the CPU backend is as expected. -class CpuCodegenTest : public LLVMIRGenTestBase {}; - -} // namespace cpu -} // namespace xla - -#endif // PLATFORMS_XLA_TESTS_CPU_CPU_CODEGEN_TEST_H_ diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc deleted file mode 100644 index 14f223e05e..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/cpu_external_constants_test.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "tensorflow/compiler/xla/array2d.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h" -#include "tensorflow/compiler/xla/tests/filecheck.h" -#include "tensorflow/core/platform/test.h" - -namespace xla { -namespace cpu { -namespace { -class CpuExternalConstantsTest : public CpuCodegenTest { - public: - void TestWithArray(int64 rows, int64 cols, const char* filecheck_pattern) { - HloComputation::Builder builder(TestName()); - - Array2D backing_array(rows, cols); - backing_array.FillUnique(); - - auto shape = ShapeUtil::MakeShape(F32, {rows, cols}); - - HloInstruction* constant = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR2FromArray2D(backing_array))); - HloInstruction* param = - builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "x")); - builder.AddInstruction( - HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param, constant)); - - std::unique_ptr module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - CompileAndVerifyIr(std::move(module), filecheck_pattern, - /*match_optimized_ir=*/false); - } -}; - -TEST_F(CpuExternalConstantsTest, Basic) { - TestWithArray(/*rows=*/1024, /*cols=*/1024, R"( -CHECK: @constant_global_0 = external constant [1024 x [1024 x float]], align 16 -)"); -} - -TEST_F(CpuExternalConstantsTest, BasicNegative) { - // The constant array in this test case is small enough that there is no need - // to externalize it. - TestWithArray(/*rows=*/4, /*cols=*/4, R"( -CHECK-NOT: @constant_global_0 = external constant [4 x [4 x float]], align 8 -CHECK: @0 = private constant [4 x [4 x float]] {{.*}}, align 8 -)"); -} -} // namespace -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc deleted file mode 100644 index 9231d3960e..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/cpu_fusion_test.cc +++ /dev/null @@ -1,330 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/compiler/xla/ptr_util.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_module.h" -#include "tensorflow/compiler/xla/service/hlo_opcode.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/tests/literal_test_util.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/platform/test.h" - -namespace xla { -namespace cpu { -namespace { - -class CpuFusionTest : public HloTestBase { - protected: - CpuFusionTest() {} - - ErrorSpec error_spec_{0.0001, 1e-5}; -}; - -TEST_F(CpuFusionTest, FuseTwoElementwiseOps) { - auto builder = HloComputation::Builder(TestName()); - auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); - auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); - Shape vshape = input_literal1->shape(); - - auto input1 = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(input_literal1))); - auto input2 = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(input_literal2))); - - auto add1 = builder.AddInstruction( - HloInstruction::CreateBinary(vshape, HloOpcode::kAdd, input1, input2)); - builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, add1)); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - CpuInstructionFusion fusion; - EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); - - // The computation root instruction was fused. Verify the fusion instruction - // is now the root. - auto computation = module->entry_computation(); - auto fusion_instruction = computation->root_instruction(); - EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); - EXPECT_EQ(HloOpcode::kNegate, - fusion_instruction->fused_expression_root()->opcode()); - // There should be four fused instructions: 2 parameters, the add, and the - // negate. - EXPECT_EQ(4, fusion_instruction->fused_instruction_count()); - - // Compile and execute the computation. - auto result = ExecuteAndTransfer(std::move(module), {}); - - // Check the output correctness. - LiteralTestUtil::ExpectR1Near({1.0, 40.0, -5.0}, *result, error_spec_); -} - -TEST_F(CpuFusionTest, FuseElementwiseOpChain) { - auto builder = HloComputation::Builder(TestName()); - auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); - Shape vshape = input_literal->shape(); - - auto input = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(input_literal))); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); - auto ceil = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kExp, ceil)); - auto floor = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kFloor, exp)); - auto two = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(2.0))); - builder.AddInstruction( - HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, two, floor)); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - CpuInstructionFusion fusion; - EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); - - // The computation root instruction was fused. Verify the fusion instruction - // is now the root. - auto computation = module->entry_computation(); - auto fusion_instruction = computation->root_instruction(); - EXPECT_EQ(HloOpcode::kFusion, fusion_instruction->opcode()); - EXPECT_EQ(HloOpcode::kMultiply, - fusion_instruction->fused_expression_root()->opcode()); - // There should be 7 fused instructions: 2 parameters and the fused - // operations. - EXPECT_EQ(7, fusion_instruction->fused_instruction_count()); - - // Compile and execute the computation. - auto result = ExecuteAndTransfer(std::move(module), {}); - - // Check the output correctness. - LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0}, *result, - error_spec_); -} - -TEST_F(CpuFusionTest, ElementwiseOpChainWithNonfusableInstruction) { - // Test a chain of fusable ops with a non-fusable op (a reduce) thrown in the - // middle. - auto module = CreateNewModule(); - auto builder = HloComputation::Builder(TestName()); - auto input_literal = Literal::CreateR1({-1.5, -2.5, -3.0}); - Shape vshape = input_literal->shape(); - - auto input = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(input_literal))); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, input)); - auto ceil = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); - - auto cshape = ShapeUtil::MakeShape(F32, {6}); - auto concatenate = builder.AddInstruction( - HloInstruction::CreateConcatenate(cshape, {ceil, ceil}, /*dimension=*/0)); - - // Build an x+y computation to use in a reduce. - Shape r0f32 = ShapeUtil::MakeShape(F32, {}); - auto embedded_builder = HloComputation::Builder("f32+f32"); - embedded_builder.AddInstruction(HloInstruction::CreateBinary( - r0f32, HloOpcode::kAdd, - embedded_builder.AddInstruction( - HloInstruction::CreateParameter(0, r0f32, "x")), - embedded_builder.AddInstruction( - HloInstruction::CreateParameter(1, r0f32, "y")))); - auto add_f32 = module->AddEmbeddedComputation(embedded_builder.Build()); - - // This is a nop reduction. - auto reduce = builder.AddInstruction(HloInstruction::CreateReduce( - cshape, - builder.AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeShape(F32, {6, 1}), concatenate)), - /*init_value=*/ - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - /*dimensions_to_reduce=*/{1}, add_f32)); - - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(cshape, HloOpcode::kExp, reduce)); - auto floor = builder.AddInstruction( - HloInstruction::CreateUnary(cshape, HloOpcode::kFloor, exp)); - auto two = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(2.0))); - builder.AddInstruction( - HloInstruction::CreateBinary(cshape, HloOpcode::kMultiply, two, floor)); - - module->AddEntryComputation(builder.Build()); - - CpuInstructionFusion fusion; - EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); - - // The computation root instruction was fused. Verify the fusion instruction - // is now the root. - auto computation = module->entry_computation(); - - auto fusion_instruction1 = computation->root_instruction(); - EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); - EXPECT_EQ(HloOpcode::kMultiply, - fusion_instruction1->fused_expression_root()->opcode()); - // There should be 5 fused instructions in the root fusion instruction: 2 - // parameters, multiply, floor, and exp. - EXPECT_EQ(5, fusion_instruction1->fused_instruction_count()) - << fusion_instruction1->fused_instructions_computation()->ToString(); - - auto fusion_instruction2 = reduce->operand(0); - EXPECT_EQ(HloOpcode::kFusion, fusion_instruction1->opcode()); - EXPECT_EQ(HloOpcode::kReshape, - fusion_instruction2->fused_expression_root()->opcode()); - // There should be 5 fused instructions in the second fusion instruction: 1 - // parameter, negate, ceil, concat, and reshape. - EXPECT_EQ(5, fusion_instruction2->fused_instruction_count()) - << fusion_instruction2->fused_instructions_computation()->ToString(); - - // Compile and execute the computation. - auto result = ExecuteAndTransfer(std::move(module), {}); - - // Check the output correctness. - LiteralTestUtil::ExpectR1Near({14.0, 40.0, 40.0, 14.0, 40.0, 40.0}, - *result, error_spec_); -} - -TEST_F(CpuFusionTest, TestOperandOrderToAvoidDuplication) { - // Test that the operands of an instruction to be fused are considered in the - // proper order to avoid duplication. Test input: - // - // constant = {...} - // negate = neg(constant) - // ceil = ceil(negate) - // add1 = add(negate, ceil) - // add2 = add(ceil, negate) - // - // In this example, the operands of both add1 and add2 should be fused in the - // order {ceil, negate} even though they have different orders in their - // operand vectors. Test for this problem by counting the number of nodes in - // each fusion instruction to ensure that negate is not duplicated. - auto builder = HloComputation::Builder(TestName()); - auto input_literal = Literal::CreateR1({1.0, 2.0, 3.0}); - Shape vshape = input_literal->shape(); - - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(std::move(input_literal))); - auto negate = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kNegate, constant)); - auto ceil = builder.AddInstruction( - HloInstruction::CreateUnary(vshape, HloOpcode::kCeil, negate)); - - auto add1 = builder.AddInstruction( - HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, negate, ceil)); - auto add2 = builder.AddInstruction( - HloInstruction::CreateBinary(vshape, HloOpcode::kMultiply, ceil, negate)); - - // Tie together the two adds with a tuple to create a single root. - auto result = - builder.AddInstruction(HloInstruction::CreateTuple({add1, add2})); - - // Create computation and module. - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - // Run fusion. - CpuInstructionFusion fusion; - EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); - - auto fusion1 = result->operand(0); - auto fusion2 = result->operand(1); - EXPECT_EQ(HloOpcode::kFusion, fusion1->opcode()); - EXPECT_EQ(HloOpcode::kFusion, fusion2->opcode()); - - // Each fusion instruction should have 4 fused instruction inside: add, ceil, - // negate, and the fused parameter. - EXPECT_EQ(4, fusion1->fused_instruction_count()); - EXPECT_EQ(4, fusion2->fused_instruction_count()); - - // Each fusion instruction should have one parameter and the parameter should - // be the constant. - EXPECT_EQ(1, fusion1->operand_count()); - EXPECT_EQ(constant, fusion1->operand(0)); - EXPECT_EQ(1, fusion2->operand_count()); - EXPECT_EQ(constant, fusion2->operand(0)); -} - -TEST_F(CpuFusionTest, DoNotDuplicateExpensiveOps) { - // Verify that expensive operations will not be fused if the fusion results in - // duplication. Test code: - // - // constant = 42.0 - // exp1 = exp(constant) - // negate1 = negate(exp1) - // exp2 = exp(constant) - // negate2 = negate(exp2) - // tuple = tuple(negate1, negate2, exp2) - // - // exp1 should be fused down into negate1, but exp2 will not be fused into - // negate2 because this will result in duplication of the expensive exp - // computation. The duplication is caused by the other use of exp2 in the - // tuple. - auto builder = HloComputation::Builder(TestName()); - auto input_literal1 = Literal::CreateR1({1.0, 2.0, 3.0}); - auto input_literal2 = Literal::CreateR1({-2.0, -42.0, 2.0}); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42.0))); - Shape shape = constant->shape(); - - auto exp1 = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); - auto negate1 = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp1)); - - auto exp2 = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kExp, constant)); - auto negate2 = builder.AddInstruction( - HloInstruction::CreateUnary(shape, HloOpcode::kNegate, exp2)); - - auto tuple = builder.AddInstruction( - HloInstruction::CreateTuple({negate1, negate2, exp2})); - - auto module = CreateNewModule(); - module->AddEntryComputation(builder.Build()); - - CpuInstructionFusion fusion; - EXPECT_TRUE(fusion.Run(module.get()).ValueOrDie()); - - // The only fusion instruction should be operand 0 of the tuple (formerly - // negate1). - EXPECT_EQ(HloOpcode::kFusion, tuple->operand(0)->opcode()); - EXPECT_EQ(HloOpcode::kNegate, tuple->operand(1)->opcode()); - EXPECT_EQ(HloOpcode::kExp, tuple->operand(2)->opcode()); - - auto fusion_inst = tuple->operand(0); - // There should be three fused instructions: negate2, exp2, and the fused - // parameter. - EXPECT_EQ(3, fusion_inst->fused_instruction_count()); - EXPECT_EQ(1, fusion_inst->operand_count()); - EXPECT_EQ(constant, fusion_inst->operand(0)); -} - -} // namespace -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc b/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc deleted file mode 100644 index 15a8a44e4c..0000000000 --- a/tensorflow/compiler/xla/tests/cpu/cpu_intrinsic_test.cc +++ /dev/null @@ -1,150 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" -#include "tensorflow/compiler/xla/service/hlo_computation.h" -#include "tensorflow/compiler/xla/tests/cpu/cpu_codegen_test.h" -#include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/test.h" - -namespace xla { -namespace cpu { -namespace { - -const char* const kTriple_x86_64 = "x86_64-pc-linux"; -const char* const kTriple_android_arm = "armv7-none-android"; - -struct IntrinsicTestSpec { - HloOpcode opcode; - tensorflow::StringPiece triple; - tensorflow::StringPiece features; - tensorflow::StringPiece check_lines; -}; - -// Tests that unary functions get lowered using intrinsic calls. -class CpuUnaryIntrinsicTest - : public CpuCodegenTest, - public ::testing::WithParamInterface { - public: - static string Name(const ::testing::TestParamInfo& info) { - auto spec = info.param; - - string opcode = HloOpcodeString(spec.opcode); - opcode[0] = toupper(opcode[0]); - - string triple{spec.triple.data(), spec.triple.size()}; - if (triple == kTriple_x86_64) { - triple = "x86_64"; - } else if (triple == kTriple_android_arm) { - triple = "android_arm"; - } else { - triple = "Unknown"; - } - - string features{spec.features.data(), spec.features.size()}; - if (!features.empty()) { - std::replace_if(features.begin(), features.end(), - [](char c) { return c != '_' && !isalnum(c); }, '_'); - } else { - features = ""; - } - - return tensorflow::strings::StrCat(opcode.c_str(), "_On_", triple.c_str(), - features.empty() ? "" : "_With", - features.c_str()); - } -}; - -// Creates a module with a call to the unary op, and tests if the -// compiler replaced it with a call to the intrinsic. -TEST_P(CpuUnaryIntrinsicTest, DoIt) { - HloComputation::Builder builder(TestName()); - IntrinsicTestSpec spec = GetParam(); - - auto param_shape = ShapeUtil::MakeShape(F32, {1024}); - HloInstruction* param = builder.AddInstruction( - HloInstruction::CreateParameter(0, param_shape, "input")); - builder.AddInstruction( - HloInstruction::CreateUnary(param_shape, spec.opcode, param)); - std::unique_ptr computation = builder.Build(); - - string triple{spec.triple.data(), spec.triple.size()}; - string features{spec.features.data(), spec.features.size()}; - - CpuAotCompilationOptions options{ - /*triple=*/triple, /*cpu_name=*/"", /*features=*/features, - /*entry_point_name=*/"entry", - /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; - - auto hlo_module = CreateNewModule(); - hlo_module->AddEntryComputation(std::move(computation)); - - string check_lines{spec.check_lines.data(), spec.check_lines.size()}; - - CompileAheadOfTimeAndVerifyIr(std::move(hlo_module), options, check_lines, - /*match_optimized_ir=*/true); -} - -IntrinsicTestSpec CpuUnaryIntrinsicTestCases[] = { - IntrinsicTestSpec{ - HloOpcode::kExp, kTriple_x86_64, "+sse4.1", - R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_ExpV4F32SSE(<4 x float> %wide.load))"}, - - IntrinsicTestSpec{ - HloOpcode::kExp, kTriple_x86_64, "+avx", - R"(CHECK: call fast <8 x float> @__xla_cpu_runtime_ExpV8F32AVX(<8 x float> %wide.load))"}, - - IntrinsicTestSpec{ - HloOpcode::kExp, kTriple_android_arm, "+neon", - R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_ExpV4F32NEON(<4 x float> %wide.load))"}, - - IntrinsicTestSpec{ - HloOpcode::kLog, kTriple_x86_64, "+sse4.1", - R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_LogV4F32SSE(<4 x float> %wide.load))"}, - - IntrinsicTestSpec{ - HloOpcode::kLog, kTriple_x86_64, "+avx", - R"(CHECK: call fast <8 x float> @__xla_cpu_runtime_LogV8F32AVX(<8 x float> %wide.load))"}, - - IntrinsicTestSpec{ - HloOpcode::kLog, kTriple_android_arm, "+neon", - R"(CHECK: call fast <4 x float> @__xla_cpu_runtime_LogV4F32NEON(<4 x float> %wide.load))"}, - - // Tanh is inlined, so we match a line from it instead of a function call. - - IntrinsicTestSpec{ - HloOpcode::kTanh, kTriple_x86_64, "", - R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}, - - IntrinsicTestSpec{ - HloOpcode::kTanh, kTriple_x86_64, "+avx", - R"(CHECK: fcmp fast uge <8 x float> %wide.load, )"}, - - IntrinsicTestSpec{ - HloOpcode::kTanh, kTriple_android_arm, "", - R"(CHECK: fcmp fast uge <4 x float> %wide.load, )"}}; - -INSTANTIATE_TEST_CASE_P(CpuUnaryIntrinsicTestInstantiation, - CpuUnaryIntrinsicTest, - ::testing::ValuesIn(CpuUnaryIntrinsicTestCases), - CpuUnaryIntrinsicTest::Name); - -} // namespace -} // namespace cpu -} // namespace xla -- GitLab From ff8f26d5968f01016428e1755adf514362bf880b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 10 Oct 2017 21:06:53 -0700 Subject: [PATCH 256/909] Improves "SparseTensor labels are not supported" error message. PiperOrigin-RevId: 171775503 --- tensorflow/python/estimator/canned/head.py | 26 +++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index 43baaece4b..e53626fc54 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -188,9 +188,6 @@ class _Head(object): def _maybe_expand_dim(tensor): """Expand the dim of `tensor` with static rank 1.""" with ops.name_scope(None, 'maybe_expand_dim', (tensor,)): - tensor = sparse_tensor.convert_to_tensor_or_sparse_tensor(tensor) - if isinstance(tensor, sparse_tensor.SparseTensor): - raise ValueError('SparseTensor labels are not supported.') static_shape = tensor.shape if static_shape is None: return tensor @@ -199,12 +196,21 @@ def _maybe_expand_dim(tensor): else tensor) -def _check_labels(labels, expected_labels_dimension): - """Check labels type and shape.""" +def _check_and_reshape_dense_labels(labels, expected_labels_dimension): + """Checks dense labels type and shape and reshapes to 2D Tensor.""" with ops.name_scope(None, 'labels', (labels,)) as scope: labels = sparse_tensor.convert_to_tensor_or_sparse_tensor(labels) if isinstance(labels, sparse_tensor.SparseTensor): - raise ValueError('SparseTensor labels are not supported.') + raise ValueError( + 'SparseTensor labels are not supported. ' + 'labels must be a Tensor of shape [batch_size, %s]. ' + 'Suggested Fix (1): Check the label feature in your data. ' + 'Each example must contain %s value(s). If not, your choice of label ' + 'was probably incorrect. ' + 'Suggested Fix (2): In your input_fn, use ' + 'tf.sparse_tensor_to_dense() to turn labels into a Tensor.' + '' % (expected_labels_dimension, expected_labels_dimension)) + labels = _maybe_expand_dim(labels) labels_shape = array_ops.shape(labels) err_msg = 'labels shape must be [batch_size, {}]'.format( expected_labels_dimension) @@ -430,7 +436,7 @@ class _MultiClassHeadWithSoftmaxCrossEntropyLoss(_Head): def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode, features # Unused for this head. - label_ids = self._label_ids(_check_labels(_maybe_expand_dim(labels), 1)) + label_ids = self._label_ids(_check_and_reshape_dense_labels(labels, 1)) unweighted_loss = losses.sparse_softmax_cross_entropy( labels=label_ids, logits=logits, reduction=losses.Reduction.NONE) # Restore the squeezed dim, so unweighted_loss matches the weights shape. @@ -674,7 +680,7 @@ class _BinaryLogisticHeadWithSigmoidCrossEntropyLoss(_Head): def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode, features # Unused for this head. - labels = _check_labels(_maybe_expand_dim(labels), self.logits_dimension) + labels = _check_and_reshape_dense_labels(labels, self.logits_dimension) if self._label_vocabulary is not None: labels = lookup_ops.index_table_from_tensor( vocabulary_list=tuple(self._label_vocabulary), @@ -823,8 +829,8 @@ class _RegressionHeadWithMeanSquaredErrorLoss(_Head): def create_loss(self, features, mode, logits, labels): """See `Head`.""" del mode, features # Unused for this head. - labels = _check_labels( - _maybe_expand_dim(math_ops.to_float(labels)), self._logits_dimension) + labels = _check_and_reshape_dense_labels( + math_ops.to_float(labels), self._logits_dimension) return LossAndLabels( unweighted_loss=losses.mean_squared_error( labels=labels, predictions=logits, reduction=losses.Reduction.NONE), -- GitLab From 1ad5e692e2fc218ca0b2a9a461c19762fdc9674b Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Tue, 10 Oct 2017 23:50:29 -0700 Subject: [PATCH 257/909] Added support for Python3 Raspberry Pi CI builds (#13612) * Fix for RTLD_GLOBAL breakage of Pi builds, and removed Eigen version change for Pi that's no longer needed * Fixed Pi Zero OpenBLAS build problems and tidied up directories used * More robust checks in Pi build script * Changed output directory for Pi CI build to fix permissions problem * Added support for Python3 Raspberry Pi CI builds * Tidied up comments and updated Python tool template * Cleaned up Python include path logic --- tensorflow/tools/ci_build/Dockerfile.pi | 3 ++ .../tools/ci_build/Dockerfile.pi-python3 | 23 +++++++++++++++ .../install/install_pi_python3_toolchain.sh | 29 +++++++++++++++++++ .../ci_build/install/install_pi_toolchain.sh | 2 +- third_party/toolchains/cpus/arm/CROSSTOOL.tpl | 2 +- .../cpus/arm/arm_compiler_configure.bzl | 11 +++++++ 6 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.pi-python3 create mode 100755 tensorflow/tools/ci_build/install/install_pi_python3_toolchain.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.pi b/tensorflow/tools/ci_build/Dockerfile.pi index 9d12ededb8..2fddd6a2c0 100644 --- a/tensorflow/tools/ci_build/Dockerfile.pi +++ b/tensorflow/tools/ci_build/Dockerfile.pi @@ -14,6 +14,9 @@ RUN /install/install_proto3.sh RUN /install/install_buildifier.sh RUN /install/install_auditwheel.sh RUN /install/install_golang.sh + +# The following line installs the Python cross-compilation toolchain. All the +# preceding dependencies should be kept in sync with the main CPU docker file. RUN /install/install_pi_toolchain.sh # Set up the master bazelrc configuration file. diff --git a/tensorflow/tools/ci_build/Dockerfile.pi-python3 b/tensorflow/tools/ci_build/Dockerfile.pi-python3 new file mode 100644 index 0000000000..18b131ea19 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.pi-python3 @@ -0,0 +1,23 @@ +FROM ubuntu:14.04 + +MAINTAINER Jan Prach + +# Copy and run the install scripts. +COPY install/*.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_bazel.sh +RUN /install/install_proto3.sh +RUN /install/install_buildifier.sh +RUN /install/install_auditwheel.sh +RUN /install/install_golang.sh + +# The following line installs the Python cross-compilation toolchain. All the +# preceding dependencies should be kept in sync with the main CPU docker file. +RUN /install/install_pi_python3_toolchain.sh + +# Set up the master bazelrc configuration file. +COPY install/.bazelrc /etc/bazel.bazelrc diff --git a/tensorflow/tools/ci_build/install/install_pi_python3_toolchain.sh b/tensorflow/tools/ci_build/install/install_pi_python3_toolchain.sh new file mode 100755 index 0000000000..9d8e3df3b5 --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_pi_python3_toolchain.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +dpkg --add-architecture armhf +echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list +echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-updates main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list +echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-security main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list +echo 'deb [arch=armhf] http://ports.ubuntu.com/ trusty-backports main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list +sed -i 's#deb http://archive.ubuntu.com/ubuntu/#deb [arch=amd64] http://archive.ubuntu.com/ubuntu/#g' /etc/apt/sources.list +apt-get update +apt-get install -y libpython3-all-dev:armhf +echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list +curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - +apt-get update +rm -rf /usr/local/bin/bazel +apt-get install -y bazel python3 python3-numpy python3-dev python3-pip diff --git a/tensorflow/tools/ci_build/install/install_pi_toolchain.sh b/tensorflow/tools/ci_build/install/install_pi_toolchain.sh index ef30ba58c2..03c43cc838 100755 --- a/tensorflow/tools/ci_build/install/install_pi_toolchain.sh +++ b/tensorflow/tools/ci_build/install/install_pi_toolchain.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/third_party/toolchains/cpus/arm/CROSSTOOL.tpl b/third_party/toolchains/cpus/arm/CROSSTOOL.tpl index ad7f5596d0..f0e17d1fe0 100644 --- a/third_party/toolchains/cpus/arm/CROSSTOOL.tpl +++ b/third_party/toolchains/cpus/arm/CROSSTOOL.tpl @@ -87,7 +87,7 @@ toolchain { cxx_flag: "-isystem" cxx_flag: "/usr/include/arm-linux-gnueabihf" cxx_flag: "-isystem" - cxx_flag: "/usr/include/python2.7" + cxx_flag: "%{PYTHON_INCLUDE_PATH}%" cxx_flag: "-isystem" cxx_flag: "/usr/include/" linker_flag: "-lstdc++" diff --git a/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl index 5eb3b7bb1c..ab6eac115c 100644 --- a/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl +++ b/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl @@ -11,9 +11,20 @@ def _tpl(repository_ctx, tpl, substitutions={}, out=None): def _arm_compiler_configure_impl(repository_ctx): + # We need to find a cross-compilation include directory for Python, so look + # for an environment variable. Be warned, this crosstool template is only + # regenerated on the first run of Bazel, so if you change the variable after + # it may not be reflected in later builds. Doing a shutdown and clean of Bazel + # doesn't fix this, you'll need to delete the generated file at something like: + # external/local_config_arm_compiler/CROSSTOOL in your Bazel install. + if "CROSSTOOL_PYTHON_INCLUDE_PATH" in repository_ctx.os.environ: + python_include_path = repository_ctx.os.environ["CROSSTOOL_PYTHON_INCLUDE_PATH"] + else: + python_include_path = "/usr/include/python2.7" _tpl(repository_ctx, "CROSSTOOL", { "%{ARM_COMPILER_PATH}%": str(repository_ctx.path( repository_ctx.attr.remote_config_repo)), + "%{PYTHON_INCLUDE_PATH}%": python_include_path, }) repository_ctx.symlink(repository_ctx.attr.build_file, "BUILD") -- GitLab From 0ed44c0144c9dfae8a53dd3b4f943f23c5a57e37 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 11 Oct 2017 00:22:33 -0700 Subject: [PATCH 258/909] TensorFlow base ApiDefs and tests to make sure they are kept in sync. PiperOrigin-RevId: 171788007 --- tensorflow/core/BUILD | 30 + tensorflow/core/api_def/api_test.cc | 206 ++ .../core/api_def/base_api/api_def_A.pbtxt | 670 +++++ .../core/api_def/base_api/api_def_B.pbtxt | 448 +++ .../core/api_def/base_api/api_def_C.pbtxt | 513 ++++ .../core/api_def/base_api/api_def_D.pbtxt | 790 +++++ .../core/api_def/base_api/api_def_E.pbtxt | 261 ++ .../core/api_def/base_api/api_def_F.pbtxt | 411 +++ .../core/api_def/base_api/api_def_G.pbtxt | 257 ++ .../core/api_def/base_api/api_def_H.pbtxt | 52 + .../core/api_def/base_api/api_def_I.pbtxt | 518 ++++ .../core/api_def/base_api/api_def_L.pbtxt | 392 +++ .../core/api_def/base_api/api_def_M.pbtxt | 749 +++++ .../core/api_def/base_api/api_def_N.pbtxt | 94 + .../core/api_def/base_api/api_def_O.pbtxt | 195 ++ .../core/api_def/base_api/api_def_P.pbtxt | 431 +++ .../core/api_def/base_api/api_def_Q.pbtxt | 609 ++++ .../core/api_def/base_api/api_def_R.pbtxt | 1392 +++++++++ .../core/api_def/base_api/api_def_S.pbtxt | 2678 +++++++++++++++++ .../core/api_def/base_api/api_def_T.pbtxt | 619 ++++ .../core/api_def/base_api/api_def_U.pbtxt | 150 + .../core/api_def/base_api/api_def_V.pbtxt | 19 + .../core/api_def/base_api/api_def_W.pbtxt | 72 + .../core/api_def/base_api/api_def_Z.pbtxt | 27 + tensorflow/core/api_def/update_api_def.sh | 28 + tensorflow/core/framework/op.h | 3 +- 26 files changed, 11613 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/api_def/api_test.cc create mode 100644 tensorflow/core/api_def/base_api/api_def_A.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_B.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_C.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_D.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_E.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_F.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_G.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_H.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_I.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_L.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_M.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_N.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_O.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_P.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_Q.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_R.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_S.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_T.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_U.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_V.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_W.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_Z.pbtxt create mode 100755 tensorflow/core/api_def/update_api_def.sh diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index f3e43dd552..74aecbc1f2 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3316,6 +3316,36 @@ tf_cc_test( ], ) +filegroup( + name = "base_api_def", + data = glob(["api_def/base_api/*"]), +) + +tf_cc_test( + name = "api_test", + srcs = ["api_def/api_test.cc"], + data = [ + ":base_api_def", + "//tensorflow/cc:ops/op_gen_overrides.pbtxt", + ], + tags = [ + "manual", + "notap", + ], + deps = [ + ":framework", + ":framework_internal", + ":lib", + ":lib_internal", + ":lib_test_internal", + ":op_gen_lib", + ":op_gen_overrides_proto_cc", + ":ops", + ":protos_all_cc", + ":test", + ], +) + tf_cc_test_gpu( name = "gpu_tracer_test", size = "small", diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc new file mode 100644 index 0000000000..ceeb172fa0 --- /dev/null +++ b/tensorflow/core/api_def/api_test.cc @@ -0,0 +1,206 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Test that verifies tensorflow/core/api_def/base_api/api_def*.pbtxt files +// are correct. If api_def*.pbtxt do not match expected contents, run +// tensorflow/core/api_def/base_api/update_api_def.sh script to update them. + +#include +#include +#include +#include +#include + +#include "tensorflow/core/framework/api_def.pb.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" +#include "tensorflow/core/framework/op_gen_overrides.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace tensorflow { +namespace { +constexpr char kDefaultApiDefDir[] = + "tensorflow/core/api_def/base_api"; +constexpr char kOverridesFilePath[] = + "tensorflow/cc/ops/op_gen_overrides.pbtxt"; +constexpr char kApiDefFileFormat[] = "api_def_%c.pbtxt"; +constexpr char kAlphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +// Get map from first character to ApiDefs for ops +// that start with that character. +std::unordered_map GenerateApiDef( + const OpList& ops, const OpGenOverrides& overrides) { + std::unordered_map name_to_override; + for (const auto& op_override : overrides.op()) { + name_to_override[op_override.name()] = op_override; + } + + std::unordered_map api_defs_map; + + for (const auto& op : ops.op()) { + CHECK(!op.name().empty()) + << "Encountered empty op name: %s" << op.DebugString(); + const char file_id = toupper(op.name()[0]); + CHECK(isalpha(file_id)) << "Unexpected op name: " << op.name(); + ApiDef* api_def = api_defs_map[file_id].add_op(); + api_def->set_graph_op_name(op.name()); + + if (name_to_override.find(op.name()) != name_to_override.end()) { + const auto& op_override = name_to_override[op.name()]; + // Set visibility + if (op_override.skip()) { + api_def->set_visibility(ApiDef_Visibility_SKIP); + } else if (op_override.hide()) { + api_def->set_visibility(ApiDef_Visibility_HIDDEN); + } + // Add endpoints + if (!op_override.rename_to().empty()) { + auto* endpoint = api_def->add_endpoint(); + endpoint->set_name(op_override.rename_to()); + } else { + auto* endpoint = api_def->add_endpoint(); + endpoint->set_name(op.name()); + } + for (auto& alias : op_override.alias()) { + auto* endpoint = api_def->add_endpoint(); + endpoint->set_name(alias); + } + // Add attributes + for (auto& attr : op.attr()) { + auto* api_def_attr = api_def->add_attr(); + api_def_attr->set_name(attr.name()); + for (auto& attr_override : op_override.attr_default()) { + if (attr.name() == attr_override.name()) { + *(api_def_attr->mutable_default_value()) = attr_override.value(); + } + } + for (auto& attr_rename : op_override.attr_rename()) { + if (attr.name() == attr_rename.from()) { + api_def_attr->set_rename_to(attr_rename.to()); + } + } + } + } else { + auto* endpoint = api_def->add_endpoint(); + endpoint->set_name(op.name()); + } + // Add docs + api_def->set_summary(op.summary()); + api_def->set_description(op.description()); + } + return api_defs_map; +} + +// Reads golden api defs file with the given suffix. +string GetGoldenApiDefsStr(Env* env, const string& api_files_dir, char suffix) { + string file_path = strings::Printf( + io::JoinPath(api_files_dir, kApiDefFileFormat).c_str(), suffix); + if (env->FileExists(file_path).ok()) { + string file_contents; + TF_EXPECT_OK(ReadFileToString(env, file_path, &file_contents)); + return file_contents; + } + return ""; +} + +void RunApiTest(bool update_api_def, const string& api_files_dir) { + // Read C++ overrides file + string overrides_file_contents; + Env* env = Env::Default(); + TF_EXPECT_OK( + ReadFileToString(env, kOverridesFilePath, &overrides_file_contents)); + + // Read all ops + OpList ops; + OpRegistry::Global()->Export(false, &ops); + const std::vector multi_line_fields = {"description"}; + + // Get expected ApiDefs + OpGenOverrides overrides; + auto new_api_defs_map = GenerateApiDef(ops, overrides); + + bool updated_at_least_one_file = false; + + for (char c : kAlphabet) { + string golden_api_defs_str = GetGoldenApiDefsStr(env, api_files_dir, c); + string new_api_defs_str = new_api_defs_map[c].DebugString(); + new_api_defs_str = PBTxtToMultiline(new_api_defs_str, multi_line_fields); + if (golden_api_defs_str == new_api_defs_str) { + continue; + } + if (update_api_def) { + string output_file_path = + io::JoinPath(api_files_dir, strings::Printf(kApiDefFileFormat, c)); + if (new_api_defs_str.empty()) { + std::cout << "Deleting " << output_file_path << "..." << std::endl; + TF_EXPECT_OK(env->DeleteFile(output_file_path)); + } else { + std::cout << "Updating " << output_file_path << "..." << std::endl; + TF_EXPECT_OK( + WriteStringToFile(env, output_file_path, new_api_defs_str)); + } + updated_at_least_one_file = true; + } else { + EXPECT_EQ(golden_api_defs_str, new_api_defs_str) + << "To update golden API files, run " + << "tensorflow/core/api_def/update_api_def.sh."; + } + } + + if (update_api_def && !updated_at_least_one_file) { + std::cout << "Api def files are already up to date." << std::endl; + } +} + +TEST(ApiTest, GenerateBaseAPIDef) { RunApiTest(false, kDefaultApiDefDir); } +} // namespace +} // namespace tensorflow + +int main(int argc, char** argv) { + bool update_api_def = false; + tensorflow::string api_files_dir = tensorflow::kDefaultApiDefDir; + std::vector flag_list = { + tensorflow::Flag( + "update_api_def", &update_api_def, + "Whether to update tensorflow/core/api_def/base_api/api_def*.pbtxt " + "files if they differ from expected API."), + tensorflow::Flag("api_def_dir", &api_files_dir, + "Base directory of api_def*.pbtxt files.")}; + std::string usage = tensorflow::Flags::Usage(argv[0], flag_list); + bool parsed_values_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); + if (!parsed_values_ok) { + std::cerr << usage << std::endl; + return 2; + } + if (update_api_def) { + tensorflow::port::InitMain(argv[0], &argc, &argv); + tensorflow::RunApiTest(update_api_def, api_files_dir); + return 0; + } + testing::InitGoogleTest(&argc, argv); + // Run tests + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/core/api_def/base_api/api_def_A.pbtxt b/tensorflow/core/api_def/base_api/api_def_A.pbtxt new file mode 100644 index 0000000000..8193d1bc62 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_A.pbtxt @@ -0,0 +1,670 @@ +op { + graph_op_name: "Abort" + endpoint { + name: "Abort" + } + summary: "Raise a exception to abort the process when called." + description: <= 2." +} +op { + graph_op_name: "AdjustContrastv2" + endpoint { + name: "AdjustContrastv2" + } + summary: "Adjust the contrast of one or more images." + description: < [2.0132, 1.056] +``` + +@compatibility(numpy) +Equivalent to np.angle. +@end_compatibility +END +} +op { + graph_op_name: "Any" + endpoint { + name: "Any" + } + summary: "Computes the \"logical or\" of elements across dimensions of a tensor." + description: < l1 else 0.0 +accum = accum_new +END +} +op { + graph_op_name: "ApplyFtrlV2" + endpoint { + name: "ApplyFtrlV2" + } + summary: "Update \'*var\' according to the Ftrl-proximal scheme." + description: < l1 else 0.0 +accum = accum_new +END +} +op { + graph_op_name: "ApplyGradientDescent" + endpoint { + name: "ApplyGradientDescent" + } + summary: "Update \'*var\' by subtracting \'alpha\' * \'delta\' from it." +} +op { + graph_op_name: "ApplyMomentum" + endpoint { + name: "ApplyMomentum" + } + summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you" + description: < threshold`) +or and `false` otherwise. + +This operation is useful for Locality-Sensitive-Hashing (LSH) and other +algorithms that use hashing approximations of cosine and `L2` distances; +codes can be generated from an input via: + +```python +codebook_size = 50 +codebook_bits = codebook_size * 32 +codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits], + dtype=x.dtype, + initializer=tf.orthogonal_initializer()) +codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.) +codes = tf.bitcast(codes, tf.int32) # go from uint8 to int32 +# now codes has shape x.shape[:-1] + [codebook_size] +``` + +**NOTE**: Currently, the innermost dimension of the tensor must be divisible +by 8. + +Given an `input` shaped `[s0, s1, ..., s_n]`, the output is +a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`. +END +} +op { + graph_op_name: "Complex" + endpoint { + name: "Complex" + } + summary: "Converts two real numbers to a complex number." + description: < [[2.25 + 4.75j], [3.25 + 5.75j]] +``` +END +} +op { + graph_op_name: "ComplexAbs" + endpoint { + name: "ComplexAbs" + } + summary: "Computes the complex absolute value of a tensor." + description: < [0, 0, 0], [0, 2, 0], [0, 5, 0] +``` + +This is typically used by gradient computations for a concat operation. +END +} +op { + graph_op_name: "ConcatV2" + endpoint { + name: "ConcatV2" + } + summary: "Concatenates tensors along one dimension." +} +op { + graph_op_name: "ConcatenateDataset" + endpoint { + name: "ConcatenateDataset" + } + summary: "Creates a dataset that concatenates `input_dataset` with `another_dataset`." +} +op { + graph_op_name: "ConditionalAccumulator" + endpoint { + name: "ConditionalAccumulator" + } + summary: "A conditional accumulator for aggregating gradients." + description: < [-2.25 - 4.75j, 3.25 - 5.75j] +``` +END +} +op { + graph_op_name: "Const" + endpoint { + name: "Const" + } + summary: "Returns a constant tensor." +} +op { + graph_op_name: "ControlTrigger" + endpoint { + name: "ControlTrigger" + } + summary: "Does nothing. Serves as a control trigger for scheduling." + description: < [a, a * b, a * b * c] +``` + +By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +performed instead: + +```python +tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +``` + +By setting the `reverse` kwarg to `True`, the cumprod is performed in the +opposite direction: + +```python +tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +``` + +This is more efficient than using separate `tf.reverse` ops. + +The `reverse` and `exclusive` kwargs can also be combined: + +```python +tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +``` +END +} +op { + graph_op_name: "Cumsum" + endpoint { + name: "Cumsum" + } + summary: "Compute the cumulative sum of the tensor `x` along `axis`." + description: < [a, a + b, a + b + c] +``` + +By setting the `exclusive` kwarg to `True`, an exclusive cumsum is +performed instead: + +```python +tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] +``` + +By setting the `reverse` kwarg to `True`, the cumsum is performed in the +opposite direction: + +```python +tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] +``` + +This is more efficient than using separate `tf.reverse` ops. + +The `reverse` and `exclusive` kwargs can also be combined: + +```python +tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_D.pbtxt b/tensorflow/core/api_def/base_api/api_def_D.pbtxt new file mode 100644 index 0000000000..ff8a7223c7 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_D.pbtxt @@ -0,0 +1,790 @@ +op { + graph_op_name: "DebugGradientIdentity" + endpoint { + name: "DebugGradientIdentity" + } + summary: "Identity op for gradient debugging." + description: <