From 00343a48d39d9ff74ceb662c5140048295f2610a Mon Sep 17 00:00:00 2001 From: TTrapper Date: Mon, 2 Oct 2017 17:51:09 -0300 Subject: [PATCH 0001/1225] sampled version of sparse_softmax_cross_entropy_with_logits --- tensorflow/python/ops/nn.py | 1 + tensorflow/python/ops/nn_impl.py | 98 ++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index a80662c8b5..f7edace5b1 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -90,6 +90,7 @@ See the @{$python/nn} guide. @@in_top_k @@nce_loss @@sampled_softmax_loss +@@sampled_sparse_softmax_loss @@uniform_candidate_sampler @@log_uniform_candidate_sampler @@learned_unigram_candidate_sampler diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index db8e92831e..b2b57a055f 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1258,3 +1258,101 @@ def sampled_softmax_loss(weights, labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses + + +def sampled_sparse_softmax_loss(weights, + biases, + labels, + inputs, + num_sampled, + num_classes, + sampled_values=None, + remove_accidental_hits=True, + partition_strategy="mod", + name="sampled_sparse_softmax_loss"): + """Computes and returns the sampled sparse softmax training loss. + + This is a faster way to train a softmax classifier over a huge number of + classes. + + This operation is for training only. It is generally an underestimate of + the full softmax loss. + + A common use case is to use this method for training, and calculate the full + softmax loss for evaluation or inference. In this case, you must set + `partition_strategy="div"` for the two losses to be consistent, as in the + following example: + + ```python + if mode == "train": + loss = tf.nn.sampled_sparse_softmax_loss( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + ..., + partition_strategy="div") + elif mode == "eval": + logits = tf.matmul(inputs, tf.transpose(weights)) + logits = tf.nn.bias_add(logits, biases) + labels_one_hot = tf.one_hot(labels, n_classes) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=labels_one_hot, + logits=logits) + ``` + + See our [Candidate Sampling Algorithms Reference] + (https://www.tensorflow.org/extras/candidate_sampling.pdf) + + Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) + ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. + + Args: + weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` + objects whose concatenation along dimension 0 has shape + [num_classes, dim]. The (possibly-sharded) class embeddings. + biases: A `Tensor` of shape `[num_classes]`. The class biases. + labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. + The index of the single target class for each row of logits. Note that + this format differs from the `labels` argument of + `nn.sparse_softmax_cross_entropy_with_logits`. + inputs: A `Tensor` of shape `[batch_size, dim]`. The forward + activations of the input network. + num_sampled: An `int`. The number of classes to randomly sample per batch. + num_classes: An `int`. The number of possible classes. + sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, + `sampled_expected_count`) returned by a `*_candidate_sampler` function. + (if None, we default to `log_uniform_candidate_sampler`) + remove_accidental_hits: A `bool`. whether to remove "accidental hits" + where a sampled class equals one of the target classes. Default is + True. + partition_strategy: A string specifying the partitioning strategy, relevant + if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. + Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + name: A name for the operation (optional). + + Returns: + A `batch_size` 1-D tensor of per-example sampled softmax losses. + + """ + logits, labels = _compute_sampled_logits( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + num_sampled=num_sampled, + num_classes=num_classes, + num_true=1, + sampled_values=sampled_values, + subtract_log_q=True, + remove_accidental_hits=remove_accidental_hits, + partition_strategy=partition_strategy, + name=name) + + # labels returned by _compute_sampled_logits are one_hot. Convert to indices. + labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1]) + + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( + labels=labels, logits=logits) + # sampled_losses is a [batch_size] tensor. + return sampled_losses -- GitLab From 499376eb38b6b5b991e330d87c91d879a6f7bbbe Mon Sep 17 00:00:00 2001 From: Daniyar Date: Mon, 2 Oct 2017 20:58:00 +0100 Subject: [PATCH 0002/1225] unpack for int64 tensors on gpu --- tensorflow/core/kernels/unpack_op.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 7fd1def1fe..7ece912557 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -153,6 +153,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack") .HostMemory("output") .TypeConstraint("T"), UnpackOp); +REGISTER_KERNEL_BUILDER(Name("Unpack") + .Device(DEVICE_GPU) + .HostMemory("value") + .HostMemory("output") + .TypeConstraint("T"), + UnpackOp); #endif // GOOGLE_CUDA @@ -170,6 +176,12 @@ REGISTER_KERNEL_BUILDER(Name("Unpack") .HostMemory("output") .TypeConstraint("T"), UnpackOp); +REGISTER_KERNEL_BUILDER(Name("Unpack") + .Device(DEVICE_SYCL) + .HostMemory("value") + .HostMemory("output") + .TypeConstraint("T"), + UnpackOp); #undef REGISTER_SYCL #endif // TENSORFLOW_USE_SYCL -- GitLab From 7fe8a6decd3b1c077de5a3cdedff198195b16ee1 Mon Sep 17 00:00:00 2001 From: Daniyar Date: Thu, 5 Oct 2017 14:34:12 +0100 Subject: [PATCH 0003/1225] unstack op tests for dtypes --- .../python/kernel_tests/unstack_op_test.py | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py index c2dcff978a..d937108599 100644 --- a/tensorflow/python/kernel_tests/unstack_op_test.py +++ b/tensorflow/python/kernel_tests/unstack_op_test.py @@ -22,6 +22,7 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test @@ -42,15 +43,33 @@ class UnstackOpTest(test.TestCase): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): - data = np.random.randn(*shape) - # Convert data to a single tensorflow tensor - x = constant_op.constant(data) - # Unpack into a list of tensors - cs = array_ops.unstack(x, num=shape[0]) - self.assertEqual(type(cs), list) - self.assertEqual(len(cs), shape[0]) - cs = [c.eval() for c in cs] - self.assertAllEqual(cs, data) + for dtype in [np.bool, np.float16, np.float32, np.float64, np.int32, np.int64]: + data = np.random.randn(*shape).astype(dtype) + # Convert data to a single tensorflow tensor + x = constant_op.constant(data) + # Unpack into a list of tensors + cs = array_ops.unstack(x, num=shape[0]) + self.assertEqual(type(cs), list) + self.assertEqual(len(cs), shape[0]) + cs = [c.eval() for c in cs] + self.assertAllEqual(cs, data) + + def testSimpleGpu(self): + if not test_util.is_gpu_available(): + self.skipTest("No GPU available") + np.random.seed(7) + with self.test_session(use_gpu=True, force_gpu=True): + for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): + for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]: + data = np.random.randn(*shape).astype(dtype) + # Convert data to a single tensorflow tensor + x = constant_op.constant(data) + # Unpack into a list of tensors + cs = array_ops.unstack(x, num=shape[0]) + self.assertEqual(type(cs), list) + self.assertEqual(len(cs), shape[0]) + cs = [c.eval() for c in cs] + self.assertAllEqual(cs, data) def testGradientsAxis0(self): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): -- GitLab From 03233a04cf07d639d8d2b5f3fbcab479b267ac4e Mon Sep 17 00:00:00 2001 From: TTrapper Date: Fri, 6 Oct 2017 00:21:08 -0300 Subject: [PATCH 0004/1225] Adressed reviewer comments: moved to contrib, fixed erroneous doc, modified _compute_sampled_logits to optionally return target indices --- tensorflow/contrib/nn/__init__.py | 1 + .../contrib/nn/python/ops/sampling_ops.py | 97 +++++++++++++ tensorflow/python/ops/nn.py | 1 - tensorflow/python/ops/nn_impl.py | 128 +++--------------- 4 files changed, 120 insertions(+), 107 deletions(-) diff --git a/tensorflow/contrib/nn/__init__.py b/tensorflow/contrib/nn/__init__.py index be0957f473..89b70ddfc2 100644 --- a/tensorflow/contrib/nn/__init__.py +++ b/tensorflow/contrib/nn/__init__.py @@ -19,6 +19,7 @@ @@deprecated_flipped_sparse_softmax_cross_entropy_with_logits @@deprecated_flipped_sigmoid_cross_entropy_with_logits @@rank_sampled_softmax_loss +@@sampled_sparse_softmax_loss """ from __future__ import absolute_import diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 2ae529e015..b26da52f01 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -24,6 +24,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_impl +from tensorflow.python.ops import nn_ops def _rank_resample(weights, biases, inputs, sampled_values, num_resampled, @@ -240,3 +242,98 @@ def rank_sampled_softmax_loss(weights, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, name=name) + + +def sampled_sparse_softmax_loss(weights, + biases, + labels, + inputs, + num_sampled, + num_classes, + sampled_values=None, + remove_accidental_hits=True, + partition_strategy="mod", + name="sampled_sparse_softmax_loss"): + """Computes and returns the sampled sparse softmax training loss. + + This is a faster way to train a softmax classifier over a huge number of + classes. + + This operation is for training only. It is generally an underestimate of + the full softmax loss. + + A common use case is to use this method for training, and calculate the full + softmax loss for evaluation or inference. In this case, you must set + `partition_strategy="div"` for the two losses to be consistent, as in the + following example: + + ```python + if mode == "train": + loss = tf.nn.sampled_sparse_softmax_loss( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + ..., + partition_strategy="div") + elif mode == "eval": + logits = tf.matmul(inputs, tf.transpose(weights)) + logits = tf.nn.bias_add(logits, biases) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits( + labels=tf.squeeze(labels), + logits=logits) + ``` + + See our [Candidate Sampling Algorithms Reference] + (https://www.tensorflow.org/extras/candidate_sampling.pdf) + + Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) + ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. + + Args: + weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` + objects whose concatenation along dimension 0 has shape + [num_classes, dim]. The (possibly-sharded) class embeddings. + biases: A `Tensor` of shape `[num_classes]`. The class biases. + labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. + The index of the single target class for each row of logits. Note that + this format differs from the `labels` argument of + `nn.sparse_softmax_cross_entropy_with_logits`. + inputs: A `Tensor` of shape `[batch_size, dim]`. The forward + activations of the input network. + num_sampled: An `int`. The number of classes to randomly sample per batch. + num_classes: An `int`. The number of possible classes. + sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, + `sampled_expected_count`) returned by a `*_candidate_sampler` function. + (if None, we default to `log_uniform_candidate_sampler`) + remove_accidental_hits: A `bool`. whether to remove "accidental hits" + where a sampled class equals one of the target classes. Default is + True. + partition_strategy: A string specifying the partitioning strategy, relevant + if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. + Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + name: A name for the operation (optional). + + Returns: + A `batch_size` 1-D tensor of per-example sampled softmax losses. + + """ + logits, labels = nn_impl._compute_sampled_logits( + weights=weights, + biases=biases, + labels=labels, + inputs=inputs, + num_sampled=num_sampled, + num_classes=num_classes, + num_true=1, + sampled_values=sampled_values, + subtract_log_q=True, + remove_accidental_hits=remove_accidental_hits, + partition_strategy=partition_strategy, + labels_as_indices=True, + name=name) + + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( + labels=array_ops.squeeze(labels), logits=logits) + # sampled_losses is a [batch_size] tensor. + return sampled_losses diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index f7edace5b1..a80662c8b5 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -90,7 +90,6 @@ See the @{$python/nn} guide. @@in_top_k @@nce_loss @@sampled_softmax_loss -@@sampled_sparse_softmax_loss @@uniform_candidate_sampler @@log_uniform_candidate_sampler @@learned_unigram_candidate_sampler diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index b2b57a055f..ad18eedfb0 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import candidate_sampling_ops from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -893,6 +894,7 @@ def _compute_sampled_logits(weights, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", + labels_as_indices=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. @@ -930,12 +932,18 @@ def _compute_sampled_logits(weights, partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. + labels_as_indices: A `bool`. Whether the returned labels represent the + indices of the true classes. Default is `False`. name: A name for the operation (optional). Returns: - out_logits, out_labels: `Tensor` objects each with shape + out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). + out_labels: If `labels_as_indices` is `False`, a Tensor object with the same + shape as `out_logits`. Otherwise a `Tensor` of shape + `[batch_size, num_true]` with the indices of the target classes for each + row of `out_logits`. """ if isinstance(weights, variables.PartitionedVariable): @@ -1046,13 +1054,19 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) - # true_logits is a float tensor, ones_like(true_logits) is a float tensor - # of ones. We then divide by num_true to ensure the per-example labels sum - # to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) + if labels_as_indices: + # We want each row of labels to be the indices of the targets, which + # start at col 0 and end at col num_true-1. + out_labels = gen_array_ops.tile( + [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) + else: + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) return out_logits, out_labels @@ -1258,101 +1272,3 @@ def sampled_softmax_loss(weights, labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. return sampled_losses - - -def sampled_sparse_softmax_loss(weights, - biases, - labels, - inputs, - num_sampled, - num_classes, - sampled_values=None, - remove_accidental_hits=True, - partition_strategy="mod", - name="sampled_sparse_softmax_loss"): - """Computes and returns the sampled sparse softmax training loss. - - This is a faster way to train a softmax classifier over a huge number of - classes. - - This operation is for training only. It is generally an underestimate of - the full softmax loss. - - A common use case is to use this method for training, and calculate the full - softmax loss for evaluation or inference. In this case, you must set - `partition_strategy="div"` for the two losses to be consistent, as in the - following example: - - ```python - if mode == "train": - loss = tf.nn.sampled_sparse_softmax_loss( - weights=weights, - biases=biases, - labels=labels, - inputs=inputs, - ..., - partition_strategy="div") - elif mode == "eval": - logits = tf.matmul(inputs, tf.transpose(weights)) - logits = tf.nn.bias_add(logits, biases) - labels_one_hot = tf.one_hot(labels, n_classes) - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=labels_one_hot, - logits=logits) - ``` - - See our [Candidate Sampling Algorithms Reference] - (https://www.tensorflow.org/extras/candidate_sampling.pdf) - - Also see Section 3 of [Jean et al., 2014](http://arxiv.org/abs/1412.2007) - ([pdf](http://arxiv.org/pdf/1412.2007.pdf)) for the math. - - Args: - weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` - objects whose concatenation along dimension 0 has shape - [num_classes, dim]. The (possibly-sharded) class embeddings. - biases: A `Tensor` of shape `[num_classes]`. The class biases. - labels: A `Tensor` of type `int64` and shape `[batch_size, 1]`. - The index of the single target class for each row of logits. Note that - this format differs from the `labels` argument of - `nn.sparse_softmax_cross_entropy_with_logits`. - inputs: A `Tensor` of shape `[batch_size, dim]`. The forward - activations of the input network. - num_sampled: An `int`. The number of classes to randomly sample per batch. - num_classes: An `int`. The number of possible classes. - sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, - `sampled_expected_count`) returned by a `*_candidate_sampler` function. - (if None, we default to `log_uniform_candidate_sampler`) - remove_accidental_hits: A `bool`. whether to remove "accidental hits" - where a sampled class equals one of the target classes. Default is - True. - partition_strategy: A string specifying the partitioning strategy, relevant - if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. - Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. - name: A name for the operation (optional). - - Returns: - A `batch_size` 1-D tensor of per-example sampled softmax losses. - - """ - logits, labels = _compute_sampled_logits( - weights=weights, - biases=biases, - labels=labels, - inputs=inputs, - num_sampled=num_sampled, - num_classes=num_classes, - num_true=1, - sampled_values=sampled_values, - subtract_log_q=True, - remove_accidental_hits=remove_accidental_hits, - partition_strategy=partition_strategy, - name=name) - - # labels returned by _compute_sampled_logits are one_hot. Convert to indices. - labels = array_ops.reshape(math_ops.argmax(labels, axis=1), [-1]) - - sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - # sampled_losses is a [batch_size] tensor. - return sampled_losses -- GitLab From 7680d8d00dec8897b64ea864da71537b7be957de Mon Sep 17 00:00:00 2001 From: TTrapper Date: Fri, 6 Oct 2017 00:47:54 -0300 Subject: [PATCH 0005/1225] checkstyle fix --- tensorflow/python/ops/nn_impl.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index ad18eedfb0..8e64259143 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1055,18 +1055,18 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) if labels_as_indices: - # We want each row of labels to be the indices of the targets, which - # start at col 0 and end at col num_true-1. - out_labels = gen_array_ops.tile( - [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) + # We want each row of labels to be the indices of the targets, which + # start at col 0 and end at col num_true-1. + out_labels = gen_array_ops.tile( + [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) else: - # true_logits is a float tensor, ones_like(true_logits) is a float - # tensor of ones. We then divide by num_true to ensure the per-example - # labels sum to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) return out_logits, out_labels -- GitLab From f300bcbb3419e7ad7130a84d5375ae53d92e1568 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Sun, 22 Oct 2017 21:36:25 +0800 Subject: [PATCH 0006/1225] Propagate -DPCRE_STATIC from pcre.BUILD to swig.BUILD To fix a build error on Windows: ERROR: C:/os/t/external/swig/BUILD.bazel:5:1: Linking of rule '@swig//:swig' failed (Exit 1120): link.exe failed: error executing command misc.o : error LNK2019: unresolved external symbol __imp_pcre_compile referenced in function Swig_string_regex ... --- third_party/pcre.BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/pcre.BUILD b/third_party/pcre.BUILD index 68aadd1d40..e2cdec4029 100644 --- a/third_party/pcre.BUILD +++ b/third_party/pcre.BUILD @@ -50,12 +50,12 @@ cc_library( "-DNEWLINE=10", "-DNO_RECURSE", "-DPARENS_NEST_LIMIT=50", - "-DPCRE_STATIC=1", "-DPOSIX_MALLOC_THRESHOLD=10", "-DSTDC_HEADERS=1", "-DSUPPORT_UCP", "-DSUPPORT_UTF", ], + defines = ["PCRE_STATIC=1"], includes = ["."], visibility = ["@swig//:__pkg__"], # Please use RE2 alwayslink = 1, -- GitLab From 40fc0cb0258352b5d00f25bab55a6991b06b959b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 5 Nov 2017 14:52:42 +0000 Subject: [PATCH 0007/1225] Fix issue in the `Defun` docs This fix fixes a couple of typos in the `Defun` docs: `tf.Constant` -> `tf.constant` Signed-off-by: Yong Tang --- tensorflow/python/framework/function.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index cef3f8d4c4..f55ee5b1e1 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -82,8 +82,8 @@ class Defun(object): return x + y, x - y # Building the graph. - a = tf.Constant([1.0]) - b = tf.Constant([2.0]) + a = tf.constant([1.0]) + b = tf.constant([2.0]) c, d = MyFunc(a, b, name='mycall') ``` """ -- GitLab From 1a94310a14d073fbc80d55b211a85e47a2f9c9c6 Mon Sep 17 00:00:00 2001 From: dariavel Date: Thu, 26 Oct 2017 17:06:00 +0300 Subject: [PATCH 0008/1225] Add connectivity check Ping on each channel and count send+recv completions Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 18 ++-- tensorflow/contrib/verbs/rdma.h | 1 + tensorflow/contrib/verbs/rdma_mgr.cc | 93 ++++++++++++++++++++ tensorflow/contrib/verbs/rdma_mgr.h | 7 +- tensorflow/contrib/verbs/verbs_server_lib.cc | 5 +- 5 files changed, 111 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 331943a3ef..d99cb34661 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; //0 is illegal port number + uint8_t port_num = 0; // 0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -419,9 +419,6 @@ RdmaAdapter::RdmaAdapter(const WorkerEnv* worker_env) 0); CHECK(cq_) << "Failed to create completion queue"; CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification"; - polling_thread_.reset(Env::Default()->StartThread( - ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); })); - VLOG(2) << "Start RdmaAdapter: " << name(); } RdmaAdapter::~RdmaAdapter() { @@ -433,6 +430,12 @@ RdmaAdapter::~RdmaAdapter() { CHECK(!ibv_close_device(context_)) << "Failed to release context"; } +void RdmaAdapter::StartPolling() { + polling_thread_.reset(Env::Default()->StartThread( + ThreadOptions(), "RdmaAdapterCQThread", [this] { Process_CQ(); })); + VLOG(2) << "Start RdmaAdapter: " << name(); +} + string RdmaAdapter::name() const { return string(context_->device->name); } // Function to process incoming messages @@ -633,11 +636,6 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_index_name_table_.insert({index, buffer_names[i]}); buffer_name_index_table_.insert({buffer_names[i], index}); } - - // Initiate recv - for (int i = 0; i < 100; i++) { - Recv(); - } } } diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 52d92a7c5b..2e128961b6 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -107,6 +107,7 @@ class RdmaAdapter { ~RdmaAdapter(); // Adapter name, e.g. mlx5_0. string name() const; + void StartPolling(); void Process_CQ(); protected: diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 09b878843f..b3b3c4f31d 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,6 +115,99 @@ void RdmaMgr::SetupChannels() { } } +#define PING_RECV_WRID 0 +#define PING_BUFF_SIZE 1024 + +int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) { + struct ibv_recv_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.sg_list = &list; + wr.num_sge = 1; + wr.wr_id = PING_RECV_WRID; + + return ibv_post_recv(rc->qp_, &wr, &bad_wr); +} + +int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) { + struct ibv_send_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (uint64_t)rc; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + + return ibv_post_send(rc->qp_, &wr, &bad_wr); +} + +// Check connectivity by pinging every channel +bool RdmaMgr::ConnectivityCheck() { + int i, rcnt = 0, scnt = 0; + void* buff; + struct ibv_sge list; + buff = malloc(PING_BUFF_SIZE); + CHECK(buff) << "Malloc failed!"; + struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE, + IBV_ACCESS_LOCAL_WRITE); + CHECK(mr) << "Failed to register memory region"; + + memset(&list, 0, sizeof(list)); + list.addr = (uintptr_t)buff; + list.length = PING_BUFF_SIZE; + list.lkey = mr->lkey; + + for (const auto& p : channel_table_) { + string worker_name = p.first; + RdmaChannel* rc = p.second; + + VLOG(2) << "Ping to " << worker_name; + CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from " + << worker_name << " with error " + << std::strerror(errno); + CHECK(PostSend(rc, list) == 0) << "Couldn't post send to " << worker_name + << " with error: " << std::strerror(errno); + for (int i = 0; i < 100; i++) { + rc->Recv(); + } + } + + while (rcnt < num_remote_workers_ || scnt < num_remote_workers_) { + int ne; + do { + ne = ibv_poll_cq(rdma_adapter_->cq_, 2 * num_remote_workers_, + rdma_adapter_->wc_); + CHECK(ne >= 0) << "poll CQ failed " << ne << "with error" + << std::strerror(errno); + } while (ne < 1); + + for (i = 0; i < ne; ++i) { + ibv_wc_status s = rdma_adapter_->wc_[i].status; + // recv complete + if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) { + CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( + rdma_adapter_->wc_[i].status) + << "(" << rdma_adapter_->wc_[i].status + << ") for PING_RECV_WRID"; + ++rcnt; + // send complete + } else { + RdmaChannel* rc = + reinterpret_cast(rdma_adapter_->wc_[i].wr_id); + CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( + rdma_adapter_->wc_[i].status) + << "(" << rdma_adapter_->wc_[i].status + << ") to " << rc->remote_name_; + ++scnt; + } + } // for + } // while + CHECK(rcnt == scnt) << "Connectivity check failed!"; + ibv_dereg_mr(mr); + free(buff); + rdma_adapter_->StartPolling(); + return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt); +} + RdmaMgr::~RdmaMgr() { for (const auto& p : channel_table_) delete p.second; channel_table_.clear(); diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index b156f64096..4ace70ba57 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -28,12 +28,16 @@ limitations under the License. namespace tensorflow { class RdmaMgr { + friend class RdmaChannel; + friend class RdmaAdapter; + public: explicit RdmaMgr(const WorkerEnv* const worker_env, GrpcChannelCache* const channel_cache); ~RdmaMgr(); RdmaChannel* FindChannel(const string& key); void SetupChannels(); + bool ConnectivityCheck(); const string& local_worker() { return local_worker_; } private: @@ -44,7 +48,8 @@ class RdmaMgr { RdmaAdapter* rdma_adapter_; typedef std::unordered_map ChannelTable; ChannelTable channel_table_; - + int PostSend(RdmaChannel* rc, struct ibv_sge list); + int PostRecv(RdmaChannel* rc, struct ibv_sge list); TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr); }; diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc index 6d1c79c0fb..a606ef75a4 100644 --- a/tensorflow/contrib/verbs/verbs_server_lib.cc +++ b/tensorflow/contrib/verbs/verbs_server_lib.cc @@ -49,8 +49,8 @@ VerbsServer::~VerbsServer() { Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def, GrpcChannelCache** channel_cache) { string name_prefix = - strings::StrCat("/job:", server_def.job_name(), "/replica:0", - "/task:", server_def.task_index()); + strings::StrCat("/job:", server_def.job_name(), "/replica:0", "/task:", + server_def.task_index()); GrpcChannelSpec channel_spec; TF_RETURN_IF_ERROR(ParseChannelSpec(server_def, &channel_spec)); @@ -103,6 +103,7 @@ Status VerbsServer::Start() { ThreadOptions(), "TF_verbs_service", [this] { verbs_service_->HandleRPCsLoop(); })); rdma_mgr_->SetupChannels(); + CHECK(rdma_mgr_->ConnectivityCheck()) << "Connectivity check failed!"; verbs_state_ = CONNECTED; } } -- GitLab From 734237891314132631bdd8adf03b8d7827f9c4ae Mon Sep 17 00:00:00 2001 From: dariavel Date: Tue, 31 Oct 2017 14:11:14 +0200 Subject: [PATCH 0009/1225] Move PostSend and PostRecv from mgr to channel, PostRecv upon channel creation before connectivity check Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 39 ++++++++++++++++++++++++ tensorflow/contrib/verbs/rdma.h | 9 ++++++ tensorflow/contrib/verbs/rdma_mgr.cc | 45 ++-------------------------- tensorflow/contrib/verbs/rdma_mgr.h | 2 -- 4 files changed, 50 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index d99cb34661..55a8f20c29 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -561,9 +561,44 @@ void RdmaAdapter::Process_CQ() { } } +int RdmaChannel::PingPostRecv() { + struct ibv_recv_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.sg_list = &ping_sge_list_; + wr.num_sge = 1; + wr.wr_id = PingRecvWrid; + + return ibv_post_recv(qp_, &wr, &bad_wr); +} + +int RdmaChannel::PingPostSend() { + struct ibv_send_wr wr, *bad_wr; + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (uint64_t)this; + wr.sg_list = &ping_sge_list_; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.send_flags = IBV_SEND_SIGNALED; + + return ibv_post_send(qp_, &wr, &bad_wr); +} + RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, const string remote_name) : adapter_(adapter), local_name_(local_name), remote_name_(remote_name) { + + struct ibv_sge list; + + mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, + IBV_ACCESS_LOCAL_WRITE); + CHECK(mr_) << "Failed to register memory region"; + + memset(&list, 0, sizeof(list)); + list.addr = (uintptr_t)ping_buff_; + list.length = PingBuffSize; + list.lkey = mr_->lkey; + + ping_sge_list_ = list; // Create queue pair { struct ibv_qp_init_attr attr; @@ -637,9 +672,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_name_index_table_.insert({buffer_names[i], index}); } } + CHECK(PingPostRecv() == 0) << "Couldn't post receive from " + << remote_name_ << " with error " + << std::strerror(errno); } RdmaChannel::~RdmaChannel() { + ibv_dereg_mr(mr_); CHECK(!ibv_destroy_qp(qp_)) << "Failed to destroy QP"; delete tx_message_buffer_; delete rx_message_buffer_; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 2e128961b6..92391d6a57 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -162,6 +162,15 @@ class RdmaChannel { void RemoveRecvCallback(const string& key); void RunRecvCallback(const string& key); static const int kNumMessageBuffers = 4; + static const int PingRecvWrid = 0; + + private: + static const int PingBuffSize = 1024; + char ping_buff_[PingBuffSize]; + struct ibv_mr* mr_; + struct ibv_sge ping_sge_list_; + int PingPostRecv(); + int PingPostSend(); protected: const RdmaAdapter* adapter_; diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index b3b3c4f31d..3e2171f33d 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,56 +115,17 @@ void RdmaMgr::SetupChannels() { } } -#define PING_RECV_WRID 0 -#define PING_BUFF_SIZE 1024 - -int RdmaMgr::PostRecv(RdmaChannel* rc, struct ibv_sge list) { - struct ibv_recv_wr wr, *bad_wr; - memset(&wr, 0, sizeof(wr)); - wr.sg_list = &list; - wr.num_sge = 1; - wr.wr_id = PING_RECV_WRID; - - return ibv_post_recv(rc->qp_, &wr, &bad_wr); -} - -int RdmaMgr::PostSend(RdmaChannel* rc, struct ibv_sge list) { - struct ibv_send_wr wr, *bad_wr; - memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)rc; - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IBV_WR_SEND; - wr.send_flags = IBV_SEND_SIGNALED; - - return ibv_post_send(rc->qp_, &wr, &bad_wr); -} // Check connectivity by pinging every channel bool RdmaMgr::ConnectivityCheck() { int i, rcnt = 0, scnt = 0; - void* buff; - struct ibv_sge list; - buff = malloc(PING_BUFF_SIZE); - CHECK(buff) << "Malloc failed!"; - struct ibv_mr* mr = ibv_reg_mr(rdma_adapter_->pd_, buff, PING_BUFF_SIZE, - IBV_ACCESS_LOCAL_WRITE); - CHECK(mr) << "Failed to register memory region"; - - memset(&list, 0, sizeof(list)); - list.addr = (uintptr_t)buff; - list.length = PING_BUFF_SIZE; - list.lkey = mr->lkey; for (const auto& p : channel_table_) { string worker_name = p.first; RdmaChannel* rc = p.second; VLOG(2) << "Ping to " << worker_name; - CHECK(PostRecv(rc, list) == 0) << "Couldn't post receive from " - << worker_name << " with error " - << std::strerror(errno); - CHECK(PostSend(rc, list) == 0) << "Couldn't post send to " << worker_name + CHECK(rc->PingPostSend() == 0) << "Couldn't post send to " << worker_name << " with error: " << std::strerror(errno); for (int i = 0; i < 100; i++) { rc->Recv(); @@ -183,7 +144,7 @@ bool RdmaMgr::ConnectivityCheck() { for (i = 0; i < ne; ++i) { ibv_wc_status s = rdma_adapter_->wc_[i].status; // recv complete - if ((int)rdma_adapter_->wc_[i].wr_id == PING_RECV_WRID) { + if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) { CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( rdma_adapter_->wc_[i].status) << "(" << rdma_adapter_->wc_[i].status @@ -202,8 +163,6 @@ bool RdmaMgr::ConnectivityCheck() { } // for } // while CHECK(rcnt == scnt) << "Connectivity check failed!"; - ibv_dereg_mr(mr); - free(buff); rdma_adapter_->StartPolling(); return (num_remote_workers_ == rcnt) && (num_remote_workers_ == scnt); } diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h index 4ace70ba57..e711e60478 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.h +++ b/tensorflow/contrib/verbs/rdma_mgr.h @@ -48,8 +48,6 @@ class RdmaMgr { RdmaAdapter* rdma_adapter_; typedef std::unordered_map ChannelTable; ChannelTable channel_table_; - int PostSend(RdmaChannel* rc, struct ibv_sge list); - int PostRecv(RdmaChannel* rc, struct ibv_sge list); TF_DISALLOW_COPY_AND_ASSIGN(RdmaMgr); }; -- GitLab From 097d536c02d5e9f8ab0c2269161343471c2a00fe Mon Sep 17 00:00:00 2001 From: dariavel Date: Mon, 9 Oct 2017 15:54:32 +0300 Subject: [PATCH 0010/1225] Call done in case of not OK status fix + light code refactoring Signed-off-by: dariavel --- .../contrib/verbs/rdma_rendezvous_mgr.cc | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index ce82ca2883..2bfa81c2ae 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -58,20 +58,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( // parse src_name and dst_name string src_name, dst_name, unused; if (!DeviceNameUtils::SplitDeviceName(parsed.src_device, &src_name, + &unused) || + !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, &unused)) { s = errors::Internal("Could not parse src name."); } - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { - done(s, Args(), recv_args, Tensor{}, false); - return; - } - if (!DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, - &unused)) { - s = errors::Internal("Could not parse dst name."); - } - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); if (!s.ok()) { + LOG(ERROR) << "s is not ok, error code " << s.error_message(); done(s, Args(), recv_args, Tensor{}, false); return; } @@ -82,18 +75,13 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( // insert callback rc->InsertRecvCallback(key_with_step_id, [this, key, key_with_step_id, rc, recv_args, parsed, done]() { - Status s; - Device* src_dev; - s = env_->device_mgr->LookupDevice("CPU:0", &src_dev); - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { - done(s, Args(), recv_args, Tensor(), true); - return; - } - Device* dst_dev; - s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev); - CHECK(s.ok()) << "s is not ok, error code " << s.error_message(); - if (!s.ok()) { + Status src_s, dst_s, s; + Device* src_dev, *dst_dev; + src_s = env_->device_mgr->LookupDevice("CPU:0", &src_dev); + dst_s = env_->device_mgr->LookupDevice(parsed.dst_device, &dst_dev); + if (!src_s.ok() || !dst_s.ok()) { + s = src_s.ok() ? dst_s : src_s; + LOG(ERROR) << "s is not ok, error code " << s.error_message(); done(s, Args(), recv_args, Tensor(), true); return; } @@ -111,8 +99,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( if (dst_dev->tensorflow_gpu_device_info() && (!recv_args.alloc_attrs.on_host())) { CHECK(recv_args.device_context) - << "send dev name: " << src_dev->name() - << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); Allocator* alloc = ProcessState::singleton()->GetCUDAHostAllocator(0); Tensor copy(alloc, rm.data_type_, rm.tensor_shape_); memcpy(DMAHelper::base(©), input, rm.tensor_bytes_); @@ -122,8 +110,8 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( GPUUtil::CopyCPUTensorToGPU( ©, recv_args.device_context, dst_dev, &gpu_copy, - [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, - rc](const Status& s) { + [this, gpu_copy, key, key_with_step_id, recv_args, done, rm, rc]( + const Status& s) { CHECK(s.ok()) << "copy tensor to gpu sync"; Tensor val; val = std::move(gpu_copy); -- GitLab From 7edaa93308d7b4d03dd32c009c1ffe5847b9a8b8 Mon Sep 17 00:00:00 2001 From: Noa Ezra Date: Wed, 18 Oct 2017 10:25:04 +0300 Subject: [PATCH 0011/1225] fix compilation error when working without cuda Signed-off-by: Noa Ezra adding cuda library to BUILD file in order to use GOOGLE_CUDA define Signed-off-by: Noa Ezra --- tensorflow/contrib/verbs/BUILD | 6 ++++-- tensorflow/contrib/verbs/rdma.cc | 5 +++++ tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 746ff38b37..8b38fc1e85 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -7,6 +7,8 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl","tf_cuda_library") + exports_files(["LICENSE"]) filegroup( @@ -97,7 +99,7 @@ cc_library( alwayslink = 1, ) -cc_library( +tf_cuda_library( name = "rdma_rendezvous_mgr", srcs = ["rdma_rendezvous_mgr.cc"], hdrs = ["rdma_rendezvous_mgr.h"], @@ -130,7 +132,7 @@ cc_library( ], ) -cc_library( +tf_cuda_library( name = "rdma", srcs = ["rdma.cc"], hdrs = ["rdma.h"], diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 55a8f20c29..79c6c1ab07 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -21,8 +21,10 @@ limitations under the License. #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu/process_state.h" +#endif #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -31,6 +33,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/core/threadpool.h" namespace tensorflow { @@ -1063,6 +1066,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { +#if GOOGLE_CUDA CHECK(send_args.device_context) << "send dev name: " << src_dev->name() << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); @@ -1101,6 +1105,7 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( &proto, NULL, send_args, recv_args); }); } +#endif // GOOGLE_CUDA } else { // tensor is in CPU memory. StringPiece copy_buf; diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index 2bfa81c2ae..dbb3d25f45 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -21,8 +21,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" +#if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu/process_state.h" +#endif // GOOGLE_CUDA #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" @@ -98,6 +100,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( if (can_memcpy) { if (dst_dev->tensorflow_gpu_device_info() && (!recv_args.alloc_attrs.on_host())) { +#if GOOGLE_CUDA CHECK(recv_args.device_context) << "send dev name: " << src_dev->name() << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); @@ -118,6 +121,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( RecvPostCopyOps(key, key_with_step_id, recv_args, done, rm, rc, val, s); }); +#endif // GOOGLE_CUDA return; } else { AllocatorAttributes host_alloc_attrs; -- GitLab From d7dce09a100e29f63f2ac20740a061c9aaf27654 Mon Sep 17 00:00:00 2001 From: dariavel Date: Mon, 6 Nov 2017 11:38:15 +0200 Subject: [PATCH 0012/1225] Replace hardcoded 100 with RDMA_QP_QUEUE_DEPTH Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma_mgr.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 3e2171f33d..8d26e022d0 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -127,7 +127,7 @@ bool RdmaMgr::ConnectivityCheck() { VLOG(2) << "Ping to " << worker_name; CHECK(rc->PingPostSend() == 0) << "Couldn't post send to " << worker_name << " with error: " << std::strerror(errno); - for (int i = 0; i < 100; i++) { + for (i = 0; i < rc->adapter_->params_.queue_depth - 1; i++) { rc->Recv(); } } -- GitLab From d6b267ac78fcb6a3250c24d466e8aa478c1fc783 Mon Sep 17 00:00:00 2001 From: dariavel Date: Tue, 7 Nov 2017 11:57:50 +0200 Subject: [PATCH 0013/1225] Clang formating Signed-off-by: dariavel --- tensorflow/contrib/verbs/BUILD | 2 +- tensorflow/contrib/verbs/rdma.cc | 9 ++++----- tensorflow/contrib/verbs/rdma_mgr.cc | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/verbs/BUILD b/tensorflow/contrib/verbs/BUILD index 8b38fc1e85..38a84ffb10 100644 --- a/tensorflow/contrib/verbs/BUILD +++ b/tensorflow/contrib/verbs/BUILD @@ -7,7 +7,7 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl","tf_cuda_library") +load("//tensorflow:tensorflow.bzl", "tf_cuda_library") exports_files(["LICENSE"]) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 79c6c1ab07..1fa98a1f01 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -577,7 +577,7 @@ int RdmaChannel::PingPostRecv() { int RdmaChannel::PingPostSend() { struct ibv_send_wr wr, *bad_wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t)this; + wr.wr_id = (uint64_t) this; wr.sg_list = &ping_sge_list_; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; @@ -593,7 +593,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_sge list; mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, - IBV_ACCESS_LOCAL_WRITE); + IBV_ACCESS_LOCAL_WRITE); CHECK(mr_) << "Failed to register memory region"; memset(&list, 0, sizeof(list)); @@ -675,9 +675,8 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, buffer_name_index_table_.insert({buffer_names[i], index}); } } - CHECK(PingPostRecv() == 0) << "Couldn't post receive from " - << remote_name_ << " with error " - << std::strerror(errno); + CHECK(PingPostRecv() == 0) << "Couldn't post receive from " << remote_name_ + << " with error " << std::strerror(errno); } RdmaChannel::~RdmaChannel() { diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index 8d26e022d0..e7df0528b5 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -115,7 +115,6 @@ void RdmaMgr::SetupChannels() { } } - // Check connectivity by pinging every channel bool RdmaMgr::ConnectivityCheck() { int i, rcnt = 0, scnt = 0; -- GitLab From f552fb90e94ccfb72475327553c968412282eb26 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Tue, 7 Nov 2017 22:04:16 -0500 Subject: [PATCH 0014/1225] update create_train_op to use get_global_step --- tensorflow/contrib/training/python/training/training.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index 6a4d79796d..59f02fa38f 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -255,6 +255,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary from tensorflow.python.training import monitored_session from tensorflow.python.training import optimizer as tf_optimizer +from tensorflow.python.training import training_util # TODO(nsilberman): move add_gradients_summaries, clip_gradient_norms and # multiply_gradients into contrib/summaries and contrib/optimizers.py @@ -409,7 +410,7 @@ def create_train_op(total_loss, loss value. """ if global_step is _USE_GLOBAL_STEP: - global_step = variables.get_or_create_global_step() + global_step = training_util.get_global_step() # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) -- GitLab From 56e0d5e0d8dab578f1c9ef723772ac79e9fc9583 Mon Sep 17 00:00:00 2001 From: Jay Young Date: Wed, 8 Nov 2017 16:22:59 +0800 Subject: [PATCH 0015/1225] [FIX]the estimator generate by tf.keras.model_to_estimator() cannot export saved_model because the model_fn provided by _create_keras_model_fn wasn't set export_outputs in the returned EstimatorSpec. Here I provide a default export_outputs with serve_default key and Predict API, and the result inside is same as predictions [FIX]_save_first_checkpoint call saver.save with only a path and without filename, that make the ckpt saved with name like `{model_dir}/.meta` and `{model_dir}/.index`, which can not be found by latest_checkpoint("{model_dir}"). As state by save method of Saver, save_path should be a path to the checkpoint name. So to fix this, I change the name to `{model_dir}/keras_model.ckpt` --- tensorflow/python/keras/_impl/keras/estimator.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 125e63e1b8..a2a2fe0ead 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -19,10 +19,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os from tensorflow.python.client import session from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import export as export_lib from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib @@ -33,6 +35,9 @@ from tensorflow.python.ops import metrics as metrics_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util +from tensorflow.python.saved_model import signature_constants + +_DEFAULT_SERVING_KEY = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY def _create_ordered_io(keras_model, estimator_io_dict, is_input=True): @@ -184,7 +189,10 @@ def _create_keras_model_fn(keras_model, custom_objects=None): predictions=predictions, loss=loss, train_op=train_op, - eval_metric_ops=eval_metric_ops) + eval_metric_ops=eval_metric_ops, + export_outputs={ + _DEFAULT_SERVING_KEY: export_lib.export_output.PredictOutput(predictions) + }) return model_fn @@ -222,7 +230,7 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects, K._initialize_variables(sess) # pylint: enable=protected-access saver = saver_lib.Saver() - saver.save(sess, estimator.model_dir + '/') + saver.save(sess, os.path.join(estimator.model_dir, 'keras_model.ckpt')) def model_to_estimator(keras_model=None, -- GitLab From 5de6f68848b8bc431e18a53fa03700820bcee57f Mon Sep 17 00:00:00 2001 From: Cameron Thomas Date: Thu, 9 Nov 2017 01:19:51 +0000 Subject: [PATCH 0016/1225] Forward declare condition_variable Necessary to enable friendship with mutex --- tensorflow/core/platform/default/mutex.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/platform/default/mutex.h b/tensorflow/core/platform/default/mutex.h index c3e44c42d9..044c754e80 100644 --- a/tensorflow/core/platform/default/mutex.h +++ b/tensorflow/core/platform/default/mutex.h @@ -31,6 +31,8 @@ namespace tensorflow { enum LinkerInitialized { LINKER_INITIALIZED }; +class condition_variable; + // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized // constructor interface. This type is as fast as mutex, but is also a shared // lock. -- GitLab From b58ee215e631b9c2a0400cbd5b52ea7a3a8bfca0 Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 19:12:41 +0900 Subject: [PATCH 0017/1225] Fixed typos, comments --- tensorflow/core/public/session.h | 2 +- tensorflow/core/util/saved_tensor_slice.proto | 2 +- tensorflow/core/util/strided_slice_op.cc | 4 ++-- tensorflow/core/util/tensor_slice_reader.h | 1 - tensorflow/core/util/tensor_slice_reader_cache.h | 1 - tensorflow/core/util/tensor_slice_writer.h | 1 - 6 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h index bca384e59f..75ad50f6f2 100644 --- a/tensorflow/core/public/session.h +++ b/tensorflow/core/public/session.h @@ -186,7 +186,7 @@ class Session { /// the `SessionOptions::target` field). virtual Status Close() = 0; - // NOTE(ashankar): As of July 2017, this method was added to faciliate some + // NOTE(ashankar): As of July 2017, this method was added to facilitate some // experimentation. Reconsider/re-evaluate after September 2017. // // Sets `*output` to the `DeviceMgr` that owns accessible devices in the diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto index 6278685957..8a6dd7bdb7 100644 --- a/tensorflow/core/util/saved_tensor_slice.proto +++ b/tensorflow/core/util/saved_tensor_slice.proto @@ -1,7 +1,7 @@ // Protocol buffers for saved tensor slices. It's used for the brain tensor // ops checkpoints and the V3 checkpoints in dist_belief. -// A checkpoint file is an sstable. The value for each record is a serialized +// A checkpoint file is a stable. The value for each record is a serialized // SavedTensorSlices message (defined below). // // Each checkpoint file has a record with the empty key (""), which corresponds diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index cfe9275a09..d5bc676a9a 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -218,8 +218,8 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // - // The sparse spec does not corresopnds to the number of dimensions - // Make a dense spec that corresponds to thte number of dimensions + // The sparse spec does not corresponds to the number of dimensions + // Make a dense spec that corresponds to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h index 4bb2b24615..263f56c7fc 100644 --- a/tensorflow/core/util/tensor_slice_reader.h +++ b/tensorflow/core/util/tensor_slice_reader.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to read checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_H_ diff --git a/tensorflow/core/util/tensor_slice_reader_cache.h b/tensorflow/core/util/tensor_slice_reader_cache.h index bdd36a2791..63a8d0b068 100644 --- a/tensorflow/core/util/tensor_slice_reader_cache.h +++ b/tensorflow/core/util/tensor_slice_reader_cache.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to read checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_READER_CACHE_H_ diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h index 95d6384afe..bdb4921e1b 100644 --- a/tensorflow/core/util/tensor_slice_writer.h +++ b/tensorflow/core/util/tensor_slice_writer.h @@ -15,7 +15,6 @@ limitations under the License. // The utility to write checkpoints for google brain tensor ops and v3 // checkpoints for dist_belief. -// #ifndef TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_ #define TENSORFLOW_UTIL_TENSOR_SLICE_WRITER_H_ -- GitLab From c25cd200ddb2728aec1302f655ff220b08d60007 Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Thu, 9 Nov 2017 19:23:07 +0900 Subject: [PATCH 0018/1225] CMake: configure default string values of options properly Because cmake configures defaults values as ON or OFF only, string values as default doesn't work. Thus, when it is set "OFF", we need to re-set the values. Fixes #14400 Signed-off-by: MyungJoo Ham --- tensorflow/contrib/cmake/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 77a3fc0c83..846daf3213 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -53,7 +53,15 @@ if (NOT WIN32) set(tensorflow_CUDNN_INCLUDE /usr/include) endif (NOT tensorflow_CUDNN_INCLUDE) option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) + if (NOT tensorflow_PATH_CUDNN_STATIC_LIB) + # option's default value is OFF. Fill it with real default values + set (tensorflow_PATH_CUDNN_STATIC_LIB ${tensorflow_PATH_STATIC_LIB}) + endif (NOT tensorflow_PATH_CUDNN_STATIC_LIB) option(tensorflow_PATH_NCCL_STATIC_LIB "Override PATH_STATIC_LIB for libnccl_static.a" ${tensorflow_PATH_STATIC_LIB}) + if (NOT tensorflow_PATH_NCCL_STATIC_LIB) + # option's default value is OFF. Fill it with real default values + set (tensorflow_PATH_NCCL_STATIC_LIB ${tensorflow_PATH_STATIC_LIB}) + endif (NOT tensorflow_PATH_NCCL_STATIC_LIB) option(tensorflow_CUDA_LIBRARY_PATH "Designate the default CUDA library paths" /usr/local/cuda/lib64) if (NOT tensorflow_CUDA_LIBRARY_PATH) # option's default value is OFF. Fill it with real default values -- GitLab From d45f27d4586ef2d2dcc405eaac97b1515dad9671 Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 23:05:38 +0900 Subject: [PATCH 0019/1225] Fixed typos, comments --- tensorflow/core/util/saved_tensor_slice.proto | 2 +- tensorflow/core/util/strided_slice_op.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/saved_tensor_slice.proto b/tensorflow/core/util/saved_tensor_slice.proto index 8a6dd7bdb7..6278685957 100644 --- a/tensorflow/core/util/saved_tensor_slice.proto +++ b/tensorflow/core/util/saved_tensor_slice.proto @@ -1,7 +1,7 @@ // Protocol buffers for saved tensor slices. It's used for the brain tensor // ops checkpoints and the V3 checkpoints in dist_belief. -// A checkpoint file is a stable. The value for each record is a serialized +// A checkpoint file is an sstable. The value for each record is a serialized // SavedTensorSlices message (defined below). // // Each checkpoint file has a record with the empty key (""), which corresponds diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index d5bc676a9a..f0264c0a9d 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -218,8 +218,8 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // - // The sparse spec does not corresponds to the number of dimensions - // Make a dense spec that corresponds to the number of dimensions + // The sparse spec does not correspond to the number of dimensions + // Make a dense spec that correspond to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two -- GitLab From 6f6eb52a89ec6e360d8604fa68516cf2d819207f Mon Sep 17 00:00:00 2001 From: PW486 Date: Thu, 9 Nov 2017 23:10:41 +0900 Subject: [PATCH 0020/1225] Fixed typos, comments --- tensorflow/core/util/strided_slice_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc index f0264c0a9d..aca60b942d 100644 --- a/tensorflow/core/util/strided_slice_op.cc +++ b/tensorflow/core/util/strided_slice_op.cc @@ -219,7 +219,7 @@ Status ValidateStridedSliceOp( // Step 2: Make a sparse spec into a full index spec // // The sparse spec does not correspond to the number of dimensions - // Make a dense spec that correspond to the number of dimensions + // Make a dense spec that corresponds to the number of dimensions // // For example suppose foo[...,3:] on foo.shape=(2,2,3) then // we need to produce the missing begin_mask for the first two -- GitLab From 17626168cb05e9edc6cbbd57d04c1da8a43ecfb2 Mon Sep 17 00:00:00 2001 From: PW486 Date: Fri, 10 Nov 2017 12:33:44 +0900 Subject: [PATCH 0021/1225] Fixed typos, comments --- tensorflow/contrib/batching/shared_batch_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/batching/shared_batch_scheduler.h b/tensorflow/contrib/batching/shared_batch_scheduler.h index 41a3f99137..1853827dc0 100644 --- a/tensorflow/contrib/batching/shared_batch_scheduler.h +++ b/tensorflow/contrib/batching/shared_batch_scheduler.h @@ -63,7 +63,7 @@ namespace serving { // instead of N independent ones, with their sharing deliberately coordinated. // // SharedBatchScheduler does not implement the BatchScheduler API; rather, it -// presents an abstraction of "queues", where each queue coresponds to one type +// presents an abstraction of "queues", where each queue corresponds to one type // of task. Tasks submitted to a given queue are placed in their own batches, // and cannot be mixed with other tasks. Queues can be added and deleted // dynamically, to accommodate e.g. versions of a model being brought up and -- GitLab From e058a030f88f19a60e3a4d5ed6b5cbcf85b1a5d6 Mon Sep 17 00:00:00 2001 From: PW486 Date: Fri, 10 Nov 2017 14:19:31 +0900 Subject: [PATCH 0022/1225] Fixed typos --- tensorflow/c/c_test_util.h | 2 +- tensorflow/compiler/xla/client/computation_builder.h | 2 +- tensorflow/contrib/boosted_trees/lib/utils/batch_features.h | 2 +- tensorflow/core/grappler/costs/virtual_placer.h | 2 +- tensorflow/python/util/util.cc | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index d547337492..bc44a7b840 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -74,7 +74,7 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s, TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s); -// Split `input` along the first dimention into 3 tensors +// Split `input` along the first dimension into 3 tensors TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s, const char* name = "split3"); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 8e1b4be1f3..9159b26614 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -806,7 +806,7 @@ class ComputationBuilder { // The operand must represent a constant value, which in this case // means that it must not statically depend on any parameter of the // computation that is being built other then the ones specified on the - // paramtere list. The parameters in the list will be indexed by their + // parameter list. The parameters in the list will be indexed by their // parameter id property so the number of parameters specified should be at // least as many as the largest used parameter index. // diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7a550d6f73..badc629a11 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -56,7 +56,7 @@ class BatchFeatures { *num_sparse_int_features = sparse_int_feature_columns_.size(); if (*num_dense_float_features == 0 && *num_sparse_float_features == 0 && *num_sparse_int_features == 0) { - return errors::FailedPrecondition("Not intialized yet."); + return errors::FailedPrecondition("Not initialized yet."); } return Status::OK(); } diff --git a/tensorflow/core/grappler/costs/virtual_placer.h b/tensorflow/core/grappler/costs/virtual_placer.h index 7ccb1ebb99..fee5ce0f51 100644 --- a/tensorflow/core/grappler/costs/virtual_placer.h +++ b/tensorflow/core/grappler/costs/virtual_placer.h @@ -41,7 +41,7 @@ class VirtualPlacer { private: // Converts given device name to Lowercase Fully-Qualified Name (LFQN) string. // This helps us disambiguate device names internally and simplify matching. - // If device_name couldn't be parsed succesfully, returns empty string. + // If device_name couldn't be parsed successfully, returns empty string. string to_lfqn_or_empty(const string& device_name) const; // Map based on the cluster info: cluster device name -> device properties. diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index c3d7611ad4..a41fa7df25 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -29,7 +29,7 @@ bool WarnedThatSetIsNotSequence = false; // Returns 1 if `o` is considered a sequence for the purposes of Flatten(). // Returns 0 otherwise. -// Returns -1 if an error occured. +// Returns -1 if an error occurred. int IsSequenceHelper(PyObject* o) { if (PyDict_Check(o)) return true; if (PySet_Check(o) && !WarnedThatSetIsNotSequence) { -- GitLab From 95ed2e833abd80727164270fdc299e99ab86ffaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:35:14 +0800 Subject: [PATCH 0023/1225] TST: add test case --- .../python/kernel_tests/lookup_ops_test.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index 76c790a0a2..11778d8ddb 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -573,15 +573,19 @@ class IndexToStringTableFromFileTest(test.TestCase): return vocabulary_file def test_index_to_string_table(self): - vocabulary_file = self._createVocabFile("i2f_vocab1.txt") - with self.test_session(): - table = lookup_ops.index_to_string_table_from_file( - vocabulary_file=vocabulary_file) - features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) - self.assertRaises(errors_impl.OpError, features.eval) - lookup_ops.tables_initializer().run() - self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), - features.eval()) + vocabulary_path = self._createVocabFile("i2f_vocab1.txt") + # vocabulary_file supports string and tensor + type_funcs = [str, constant_op.constant] + for type_func in type_funcs: + vocabulary_file = type_func(vocabulary_path) + with self.test_session(): + table = lookup_ops.index_to_string_table_from_file( + vocabulary_file=vocabulary_file) + features = table.lookup(constant_op.constant([0, 1, 2, 3], dtypes.int64)) + self.assertRaises(errors_impl.OpError, features.eval) + lookup_ops.tables_initializer().run() + self.assertAllEqual((b"brain", b"salad", b"surgery", b"UNK"), + features.eval()) def test_index_to_string_table_with_default_value(self): default_value = b"NONE" -- GitLab From 603a2f3db38753cb4281f367f413e8c1975835f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:37:00 +0800 Subject: [PATCH 0024/1225] BUG: don't check tensor --- tensorflow/python/ops/lookup_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index fa58ffc37e..10b7cd7001 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -1123,8 +1123,10 @@ def index_to_string_table_from_file(vocabulary_file, ValueError: when `vocabulary_file` is empty. ValueError: when `vocab_size` is invalid. """ - if not vocabulary_file: - raise ValueError("vocabulary_file must be specified.") + if vocabulary_file is None or ( + isinstance(vocabulary_file, str) and not vocabulary_file): + raise ValueError("vocabulary_file must be specified and must not be empty.") + if vocab_size is not None and vocab_size < 1: raise ValueError("vocab_size must be greater than 0, got %d." % vocab_size) -- GitLab From bd1074ab5d2bc87d4fc37e9f6941dc138a3fb961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:39:33 +0800 Subject: [PATCH 0025/1225] DOC: add docment --- tensorflow/python/ops/lookup_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 10b7cd7001..cb5e9d08c0 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -1110,7 +1110,7 @@ def index_to_string_table_from_file(vocabulary_file, ``` Args: - vocabulary_file: The vocabulary filename. + vocabulary_file: The vocabulary filename, may be a constant scalar `Tensor`. vocab_size: Number of the elements in the vocabulary, if known. default_value: The value to use for out-of-vocabulary indices. name: A name for this op (optional). -- GitLab From 9e966e9e540d245950dcfccdb982304dac740294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Mon, 13 Nov 2017 13:45:21 +0800 Subject: [PATCH 0026/1225] ENH: use six.string_types --- tensorflow/python/ops/lookup_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index cb5e9d08c0..c489a8ab6b 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -20,6 +20,7 @@ from __future__ import print_function import collections import functools +import six from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -922,7 +923,7 @@ def index_table_from_file(vocabulary_file=None, than zero. """ if vocabulary_file is None or ( - isinstance(vocabulary_file, str) and not vocabulary_file): + isinstance(vocabulary_file, six.string_types) and not vocabulary_file): raise ValueError("vocabulary_file must be specified and must not be empty.") if num_oov_buckets < 0: raise ValueError("num_oov_buckets must be greater or equal than 0, got %d." @@ -1124,7 +1125,7 @@ def index_to_string_table_from_file(vocabulary_file, ValueError: when `vocab_size` is invalid. """ if vocabulary_file is None or ( - isinstance(vocabulary_file, str) and not vocabulary_file): + isinstance(vocabulary_file, six.string_types) and not vocabulary_file): raise ValueError("vocabulary_file must be specified and must not be empty.") if vocab_size is not None and vocab_size < 1: -- GitLab From cedb85f2cbda30b9dada94930af9ba40bbbdcf86 Mon Sep 17 00:00:00 2001 From: TTrapper Date: Tue, 14 Nov 2017 12:41:15 -0400 Subject: [PATCH 0027/1225] Removing labels_as_indices logic from _compute_sampled_logits. Now computing 0-index labels in sampled_sparse_softmax_loss. --- .../contrib/nn/python/ops/sampling_ops.py | 7 ++-- tensorflow/python/ops/nn_impl.py | 33 +++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index b26da52f01..02aa1efc5a 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -318,7 +318,7 @@ def sampled_sparse_softmax_loss(weights, A `batch_size` 1-D tensor of per-example sampled softmax losses. """ - logits, labels = nn_impl._compute_sampled_logits( + logits, _ = nn_impl._compute_sampled_logits( weights=weights, biases=biases, labels=labels, @@ -330,9 +330,12 @@ def sampled_sparse_softmax_loss(weights, subtract_log_q=True, remove_accidental_hits=remove_accidental_hits, partition_strategy=partition_strategy, - labels_as_indices=True, name=name) + # There is only one true label. _compute_sampled_logits puts the true logit + # at index 0. + labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) + sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) # sampled_losses is a [batch_size] tensor. diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 8e64259143..2bf5514c64 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -894,7 +894,6 @@ def _compute_sampled_logits(weights, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", - labels_as_indices=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. @@ -932,18 +931,13 @@ def _compute_sampled_logits(weights, partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. - labels_as_indices: A `bool`. Whether the returned labels represent the - indices of the true classes. Default is `False`. name: A name for the operation (optional). Returns: out_logits: `Tensor` object with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). - out_labels: If `labels_as_indices` is `False`, a Tensor object with the same - shape as `out_logits`. Otherwise a `Tensor` of shape - `[batch_size, num_true]` with the indices of the target classes for each - row of `out_logits`. + out_labels: A Tensor object with the same shape as `out_logits`. """ if isinstance(weights, variables.PartitionedVariable): @@ -1054,21 +1048,16 @@ def _compute_sampled_logits(weights, # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) - if labels_as_indices: - # We want each row of labels to be the indices of the targets, which - # start at col 0 and end at col num_true-1. - out_labels = gen_array_ops.tile( - [math_ops.range(num_true)], [array_ops.shape(true_logits)[0], 1]) - else: - # true_logits is a float tensor, ones_like(true_logits) is a float - # tensor of ones. We then divide by num_true to ensure the per-example - # labels sum to 1.0, i.e. form a proper probability distribution. - out_labels = array_ops.concat([ - array_ops.ones_like(true_logits) / num_true, - array_ops.zeros_like(sampled_logits) - ], 1) - - return out_logits, out_labels + + # true_logits is a float tensor, ones_like(true_logits) is a float + # tensor of ones. We then divide by num_true to ensure the per-example + # labels sum to 1.0, i.e. form a proper probability distribution. + out_labels = array_ops.concat([ + array_ops.ones_like(true_logits) / num_true, + array_ops.zeros_like(sampled_logits) + ], 1) + + return out_logits, out_labels def nce_loss(weights, -- GitLab From 7ba5810c105640f218993d989142d7e91da6703e Mon Sep 17 00:00:00 2001 From: TTrapper Date: Tue, 14 Nov 2017 13:48:29 -0400 Subject: [PATCH 0028/1225] calling array_ops instead of erroneus tf --- tensorflow/contrib/nn/python/ops/sampling_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/nn/python/ops/sampling_ops.py b/tensorflow/contrib/nn/python/ops/sampling_ops.py index 02aa1efc5a..ca719ccaf3 100644 --- a/tensorflow/contrib/nn/python/ops/sampling_ops.py +++ b/tensorflow/contrib/nn/python/ops/sampling_ops.py @@ -334,7 +334,7 @@ def sampled_sparse_softmax_loss(weights, # There is only one true label. _compute_sampled_logits puts the true logit # at index 0. - labels = tf.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) + labels = array_ops.zeros([array_ops.shape(logits)[0], 1], dtype=dtypes.int64) sampled_losses = nn_ops.sparse_softmax_cross_entropy_with_logits( labels=array_ops.squeeze(labels), logits=logits) -- GitLab From d43d00be13ff271eb8a2e6a14eb7ac01a51934ff Mon Sep 17 00:00:00 2001 From: dariavel Date: Thu, 16 Nov 2017 17:12:06 +0200 Subject: [PATCH 0029/1225] Renaming and comment fix Signed-off-by: dariavel --- tensorflow/contrib/verbs/rdma.cc | 6 +++--- tensorflow/contrib/verbs/rdma.h | 6 +++--- tensorflow/contrib/verbs/rdma_mgr.cc | 2 +- tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 1fa98a1f01..59bc65f937 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -569,7 +569,7 @@ int RdmaChannel::PingPostRecv() { memset(&wr, 0, sizeof(wr)); wr.sg_list = &ping_sge_list_; wr.num_sge = 1; - wr.wr_id = PingRecvWrid; + wr.wr_id = kPingRecvWrid; return ibv_post_recv(qp_, &wr, &bad_wr); } @@ -592,13 +592,13 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, struct ibv_sge list; - mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, PingBuffSize, + mr_ = ibv_reg_mr(adapter_->pd_, ping_buff_, kPingBuffSize, IBV_ACCESS_LOCAL_WRITE); CHECK(mr_) << "Failed to register memory region"; memset(&list, 0, sizeof(list)); list.addr = (uintptr_t)ping_buff_; - list.length = PingBuffSize; + list.length = kPingBuffSize; list.lkey = mr_->lkey; ping_sge_list_ = list; diff --git a/tensorflow/contrib/verbs/rdma.h b/tensorflow/contrib/verbs/rdma.h index 92391d6a57..fea2327d77 100644 --- a/tensorflow/contrib/verbs/rdma.h +++ b/tensorflow/contrib/verbs/rdma.h @@ -162,11 +162,11 @@ class RdmaChannel { void RemoveRecvCallback(const string& key); void RunRecvCallback(const string& key); static const int kNumMessageBuffers = 4; - static const int PingRecvWrid = 0; + static const int kPingRecvWrid = 0; private: - static const int PingBuffSize = 1024; - char ping_buff_[PingBuffSize]; + static const int kPingBuffSize = 1024; + char ping_buff_[kPingBuffSize]; struct ibv_mr* mr_; struct ibv_sge ping_sge_list_; int PingPostRecv(); diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc index e7df0528b5..9cb307bcfa 100644 --- a/tensorflow/contrib/verbs/rdma_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_mgr.cc @@ -143,7 +143,7 @@ bool RdmaMgr::ConnectivityCheck() { for (i = 0; i < ne; ++i) { ibv_wc_status s = rdma_adapter_->wc_[i].status; // recv complete - if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::PingRecvWrid) { + if ((int)rdma_adapter_->wc_[i].wr_id == RdmaChannel::kPingRecvWrid) { CHECK(s == IBV_WC_SUCCESS) << ": " << ibv_wc_status_str( rdma_adapter_->wc_[i].status) << "(" << rdma_adapter_->wc_[i].status diff --git a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc index dbb3d25f45..74f6681af3 100644 --- a/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc +++ b/tensorflow/contrib/verbs/rdma_rendezvous_mgr.cc @@ -63,7 +63,7 @@ void RdmaRemoteRendezvous::RecvFromRemoteAsync( &unused) || !DeviceNameUtils::SplitDeviceName(parsed.dst_device, &dst_name, &unused)) { - s = errors::Internal("Could not parse src name."); + s = errors::Internal("Could not parse src or dst name."); } if (!s.ok()) { LOG(ERROR) << "s is not ok, error code " << s.error_message(); -- GitLab From 1a63168ff0196f1579a1f6b4cfae2d65f1e7c04e Mon Sep 17 00:00:00 2001 From: Dave MacLachlan Date: Thu, 16 Nov 2017 15:05:58 -0800 Subject: [PATCH 0030/1225] Add LICENSES to gitignore Update gitignore file for ios to cover the license files that get installed following the install instructions. --- tensorflow/examples/ios/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/examples/ios/.gitignore b/tensorflow/examples/ios/.gitignore index e572b3012c..dbabfb33bf 100644 --- a/tensorflow/examples/ios/.gitignore +++ b/tensorflow/examples/ios/.gitignore @@ -2,3 +2,6 @@ project.xcworkspace xcuserdata imagenet_comp_graph_label_strings.txt tensorflow_inception_graph.pb +simple/data/LICENSE +camera/data/LICENSE +benchmark/data/LICENSE -- GitLab From 0f9a9c854f7dfee904c4e88130cc496ec9f2611e Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Thu, 16 Nov 2017 18:53:42 -0500 Subject: [PATCH 0031/1225] Use get_or_create_global_step --- tensorflow/contrib/training/python/training/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/training/python/training/training.py b/tensorflow/contrib/training/python/training/training.py index 59f02fa38f..8e0139bdd6 100644 --- a/tensorflow/contrib/training/python/training/training.py +++ b/tensorflow/contrib/training/python/training/training.py @@ -410,7 +410,7 @@ def create_train_op(total_loss, loss value. """ if global_step is _USE_GLOBAL_STEP: - global_step = training_util.get_global_step() + global_step = training_util.get_or_create_global_step() # Update ops use GraphKeys.UPDATE_OPS collection if update_ops is None. global_update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) -- GitLab From 55ee41a98d50e200eda314ebf08f092000477f6e Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 16 Nov 2017 15:54:17 -0800 Subject: [PATCH 0032/1225] When constructing fusion computations from a proto, do not uniquify the names. The names are already unique and uniquifying them again will mutate them resulting in inconsistent names between the proto and the constructed HLO. PiperOrigin-RevId: 176035108 --- .../compiler/xla/service/hlo_computation.cc | 12 ++++---- .../compiler/xla/service/hlo_computation.h | 12 +++++--- .../compiler/xla/service/hlo_instruction.cc | 28 +++++++++++-------- .../compiler/xla/service/hlo_instruction.h | 11 ++++++-- tensorflow/compiler/xla/service/hlo_module.cc | 13 +++++++-- 5 files changed, 49 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 8056bcf0f7..c215cc48d6 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -407,16 +407,18 @@ HloComputationProto HloComputation::ToProto() const { /* static */ StatusOr> HloComputation::CreateFromProto( HloModule* module, const HloComputationProto& proto, - tensorflow::gtl::FlatMap* computation_map, + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation, HloInstruction* fusion_instruction) { std::vector> instructions; tensorflow::gtl::FlatMap instruction_map; int64 parameter_count = 0; for (const HloInstructionProto& instruction_proto : proto.instructions()) { - TF_ASSIGN_OR_RETURN( - std::unique_ptr instruction, - HloInstruction::CreateFromProto(module, instruction_proto, - instruction_map, computation_map)); + TF_ASSIGN_OR_RETURN(std::unique_ptr instruction, + HloInstruction::CreateFromProto( + module, instruction_proto, instruction_map, + computation_map, add_fused_computation)); if (instruction->opcode() == HloOpcode::kParameter) { parameter_count++; } diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 2835dbbb84..353b30bc69 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -152,12 +152,16 @@ class HloComputation { // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed computation // calls. - // fusion_instruction: if non-null then the newly created computation will be - // constructed as a fused computation with this instruction as its fusion - // parent. + // add_fused_computation: A function to call to add a fused + // computation. Used only when the instruction is a fusion instruction. + // fusion_instruction: if non-null then the newly created computation will + // be constructed as a fused computation with this instruction as its + // fusion parent. static StatusOr> CreateFromProto( HloModule* module, const HloComputationProto& proto, - tensorflow::gtl::FlatMap* computation_map, + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation, HloInstruction* fusion_instruction = nullptr); // Gets the instructions in this computation. diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c35ca1eb99..c046b6d9c8 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -52,7 +52,9 @@ using ::tensorflow::strings::StrCat; StatusOr> HloInstruction::CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - tensorflow::gtl::FlatMap* computation_map) { + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation) { TF_RET_CHECK(!proto.opcode().empty()); TF_ASSIGN_OR_RETURN(HloOpcode opcode, StringToHloOpcode(proto.opcode())); TF_RET_CHECK(proto.has_shape()); @@ -78,19 +80,19 @@ StatusOr> HloInstruction::CreateFromProto( TF_RET_CHECK(!proto.fusion_kind().empty()); TF_ASSIGN_OR_RETURN(instruction->fusion_kind_, StringToFusionKind(proto.fusion_kind())); - TF_ASSIGN_OR_RETURN( - std::unique_ptr fused_computation, - HloComputation::CreateFromProto( - module, proto.fused_instructions_computation(), computation_map, - /*fusion_instruction=*/instruction.get())); - instruction->called_computations_.push_back( - module->AddEmbeddedComputation(std::move(fused_computation))); + TF_ASSIGN_OR_RETURN(std::unique_ptr fused_computation, + HloComputation::CreateFromProto( + module, proto.fused_instructions_computation(), + computation_map, add_fused_computation, + /*fusion_instruction=*/instruction.get())); + instruction->called_computations_.push_back(fused_computation.get()); + add_fused_computation(std::move(fused_computation)); } else { for (const string& computation_name : proto.called_computation_names()) { - TF_RET_CHECK(ContainsKey(*computation_map, computation_name)) + TF_RET_CHECK(ContainsKey(computation_map, computation_name)) << "No computation named " << computation_name; instruction->called_computations_.push_back( - computation_map->at(computation_name)); + computation_map.at(computation_name)); } } @@ -2076,8 +2078,10 @@ string HloInstruction::ToCategory() const { bool saw_rank_1 = false; bool saw_higher_rank = false; for (const auto* operand : operands()) { - saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1; - saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1; + if (!ShapeUtil::IsTuple(operand->shape())) { + saw_rank_1 |= ShapeUtil::Rank(operand->shape()) == 1; + saw_higher_rank |= ShapeUtil::Rank(operand->shape()) > 1; + } } if (saw_rank_1 && saw_higher_rank) { return "rank-1-broadcast binary fusion"; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 6b2762ff14..8c6449d73b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -44,6 +44,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/gtl/iterator_range.h" #include "tensorflow/core/platform/logging.h" @@ -83,12 +84,16 @@ class HloInstruction { // must contain all operands of the newly constructed instruction. // computation_map: a map from computation name to HloComputation*. This map // must contain all computations which the newly constructed instruction - // calls. If the instruction is a fusion instruction, then the fusion - // computation is added to this map and the module. + // calls. + // add_fused_computation: A function to call to add a fused + // computation. Used (clearly) when the instruction is a fusion + // instruction. static StatusOr> CreateFromProto( HloModule* module, const HloInstructionProto& proto, const tensorflow::gtl::FlatMap& instruction_map, - tensorflow::gtl::FlatMap* computation_map); + const tensorflow::gtl::FlatMap& computation_map, + const std::function)>& + add_fused_computation); // Creates a parameter-retrieving instruction. static std::unique_ptr CreateParameter(int64 parameter_number, diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index d9c223fbba..faaf73ea1c 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -290,9 +290,16 @@ StatusOr> HloModule::CreateFromProto( tensorflow::gtl::FlatMap computation_map; for (const HloComputationProto& computation_proto : proto.computations()) { - TF_ASSIGN_OR_RETURN(std::unique_ptr computation, - HloComputation::CreateFromProto( - module.get(), computation_proto, &computation_map)); + TF_ASSIGN_OR_RETURN( + std::unique_ptr computation, + HloComputation::CreateFromProto( + module.get(), computation_proto, computation_map, + /*add_fused_computation=*/ + [&module](std::unique_ptr fused_computation) { + module->AddComputationInternal(std::move(fused_computation), + /*is_entry=*/false, + /*uniquify_names=*/false); + })); CHECK_NE(computation.get(), nullptr); TF_RET_CHECK(!ContainsKey(computation_map, computation->name())); string computation_name = computation->name(); -- GitLab From 9a72855893a7ca2832a08e1c5c4060f8674e0c7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 16:10:25 -0800 Subject: [PATCH 0033/1225] Update fuse_op to eliminate duplicate nodes being created in the graph when injecting artificial dependency to the fused op. PiperOrigin-RevId: 176037465 --- .../framework/python/framework/graph_util.py | 2 +- .../python/framework/graph_util_test.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py index 8ab8711db4..9ba9c77b92 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util.py +++ b/tensorflow/contrib/framework/python/framework/graph_util.py @@ -91,7 +91,7 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes, (n, cur_node)) if cur_node not in input_nodes_set: next_to_visit += name_to_input_name[cur_node] - else: + elif n not in reachable_by_input: nodes_post_output.append(n) # Add all nodes upto the input nodes diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py index 87b992e22e..0c531fb290 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util_test.py +++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py @@ -56,6 +56,30 @@ class GraphUtilTest(test.TestCase): self.assertEqual(fused_graph_def.node[2].name, 'D') self.assertEqual(fused_graph_def.node[3].name, 'E') + def testGraphUtilArtificialDependencyInjection(self): + graph_def = graph_pb2.GraphDef() + node_a = GetNewNode('A', 'Placeholder', []) + node_a1 = GetNewNode('A1', 'Placeholder', []) + node_b = GetNewNode('B', 'Op1', ['A']) + node_c = GetNewNode('C', 'Op1', ['B']) + node_d = GetNewNode('D', 'Op1', ['C']) + node_e = GetNewNode('E', 'Op1', ['D']) + graph_def.node.extend([node_a, node_a1, node_b, node_c, node_d, node_e]) + fused_graph_def = graph_util.fuse_op(graph_def, ['A', 'A1'], ['D'], + [types_pb2.DT_FLOAT], True, 'FusedOp', + 'Op2') + self.assertEqual(len(fused_graph_def.node), 5) + self.assertEqual(fused_graph_def.node[0].name, 'A') + self.assertEqual(fused_graph_def.node[1].name, 'A1') + self.assertEqual(fused_graph_def.node[2].name, 'FusedOp') + self.assertEqual(fused_graph_def.node[2].input[0], 'A') + self.assertEqual(fused_graph_def.node[2].op, 'Op2') + self.assertEqual(fused_graph_def.node[2].attr['_output_quantized'].b, True) + self.assertEqual(fused_graph_def.node[2].attr['_output_types'].list.type, + [types_pb2.DT_FLOAT]) + self.assertEqual(fused_graph_def.node[3].name, 'D') + self.assertEqual(fused_graph_def.node[4].name, 'E') + if __name__ == '__main__': test.main() -- GitLab From 780c64e3e872269e76efa27b5bb7fe2465c26dfe Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Thu, 16 Nov 2017 18:23:32 -0800 Subject: [PATCH 0034/1225] Turn off graph optimization in max pooling test because of the inconsistent behavior on handling NaN and -Inf in different MaxPooling implementations. Split the tests as ConfigProto could interfere with each other. PiperOrigin-RevId: 176054079 --- .../python/kernel_tests/pooling_ops_test.py | 76 ++++++++++++++----- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index c699d50c02..30c777d12f 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -20,6 +20,8 @@ from __future__ import print_function import numpy as np +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl @@ -1172,12 +1174,27 @@ class PoolingTest(test.TestCase): [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) - def _testMaxPoolGradDirect(self, input_data, output_backprop, - expected_input_backprop, input_sizes, output_sizes, - window_rows, window_cols, row_stride, col_stride, - padding, use_gpu, v2): + def _testMaxPoolGradDirect(self, + input_data, + output_backprop, + expected_input_backprop, + input_sizes, + output_sizes, + window_rows, + window_cols, + row_stride, + col_stride, + padding, + use_gpu, + v2, + graph_optimization=False): pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool - with self.test_session(use_gpu=use_gpu): + + config = config_pb2.ConfigProto() + if graph_optimization: + config.graph_options.rewrite_options.layout_optimizer = ( + rewriter_config_pb2.RewriterConfig.ON) + with self.test_session(use_gpu=use_gpu, config=config): input_tensor = constant_op.constant(input_data, shape=input_sizes) output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], @@ -1314,7 +1331,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu, v2=v2) - def _testMaxPoolGradDirectWithNans2_1(self): + def _testMaxPoolGradDirectWithNans2_1CPU(self): input_data = [float("nan")] * 16 output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0] # Test the CPU implementation, which propagates diffs in case of NaN @@ -1337,11 +1354,23 @@ class PoolingTest(test.TestCase): use_gpu=False, v2=v2) + def _testMaxPoolGradDirectWithNans2_1GPU(self): if not test.is_gpu_available(): return - - # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + input_data = [float("nan")] * 16 + output_backprop = [11.0, 12.0, 13.0, 15.0, 16.0, 17.0, 19.0, 20.0, 21.0] + # (1) For the NHWC format (used by default below), TensorFlow currently uses + # custom MaxPoolingNoMask for the forward op, cuDNN for the gradient op. + # With NaNs as input, MaxPoolingNoMask outputs -Inf, which is then fed into + # the gradient op. The cuDNN gradient op currently doesn't propagate the + # diff if input is -Inf and as a result outputs zeros. + # (2) For the NCHW format, TensorFlow currently uses + # cuDNN for both the forward and the gradient op. With NaNs as input, cuDNN + # forward op outputs NaNs, which is then fed into cuDNN gradient op. cuDNN + # gradient op is able to propagate NaNs and as a result the output is the + # same as expected_input_backprop_tf_cpu. + # We turn off graph optimization (layout optimizer) as the behavior of the + # above two cases are different. expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 @@ -1359,9 +1388,10 @@ class PoolingTest(test.TestCase): col_stride=1, padding="VALID", use_gpu=True, - v2=v2) + v2=v2, + graph_optimization=False) - def _testMaxPoolGradDirectWithNans2_2(self): + def _testMaxPoolGradDirectWithNans2_2CPU(self): input_data = [float("nan")] * 16 output_backprop = [ float("nan"), 12.0, 13.0, 15.0, float("nan"), 17.0, 19.0, 20.0, @@ -1387,11 +1417,16 @@ class PoolingTest(test.TestCase): use_gpu=False, v2=v2) + def _testMaxPoolGradDirectWithNans2_2GPU(self): if not test.is_gpu_available(): return - - # Test the GPU implementation that uses cudnn for now. - # It does not propagate the diff in cases of NaNs + input_data = [float("nan")] * 16 + output_backprop = [ + float("nan"), 12.0, 13.0, 15.0, + float("nan"), 17.0, 19.0, 20.0, + float("nan") + ] + # See the correspoinding comment in _testMaxPoolGradDirectWithNans2_1GPU(). expected_input_backprop_cudnn = [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 @@ -1409,14 +1444,21 @@ class PoolingTest(test.TestCase): col_stride=1, padding="VALID", use_gpu=True, - v2=v2) + v2=v2, + graph_optimization=False) def testMaxPoolGradDirect(self): self._testMaxPoolGradDirect1_1() self._testMaxPoolGradDirect1_2() self._testMaxPoolGradDirect1_3() - self._testMaxPoolGradDirectWithNans2_1() - self._testMaxPoolGradDirectWithNans2_2() + self._testMaxPoolGradDirectWithNans2_1CPU() + self._testMaxPoolGradDirectWithNans2_2CPU() + + def testMaxPoolGradDirectNans2_1GPU(self): + self._testMaxPoolGradDirectWithNans2_1GPU() + + def testMaxPoolGradDirectNans2_2GPU(self): + self._testMaxPoolGradDirectWithNans2_2GPU() def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: -- GitLab From 0beff6bd1342f399173fc4e9d0e79afa3c54503b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 18:27:53 -0800 Subject: [PATCH 0035/1225] [tpu:profiler] Add run environment to TfOpStats. PiperOrigin-RevId: 176054460 --- .../contrib/tpu/profiler/tf_op_stats.proto | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 2d2207a43f..6943ff5f47 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -104,6 +104,8 @@ message HloExtraInfoResult { optional string category = 1; // The long name of the HLO that includes the dimensions. optional string long_name = 2; + // The per-TPU-core batch size inferred from this HLO. + optional int64 per_core_batch_size = 3; } // Result proto for HloExtraInfoMap. @@ -112,6 +114,20 @@ message HloExtraInfoMapResult { map hlo_extrainfo_map = 1; } +// Result proto for RunEnvironment (the run environment of a profiling session). +message RunEnvironmentResult { + // Number of hosts used. + optional int32 host_count = 1; + // The type of TPU used. + optional string tpu_type = 2; + // The number of TPU cores used. + optional int32 tpu_core_count = 3; + // The per-TPU-core batch size. + optional int32 per_core_batch_size = 4; + // Job information including build target and command line. + optional string job_info = 5; +} + // Result proto for TfStatsHelper. message TfOpStats { // The result for the TF-metric database. @@ -126,4 +142,6 @@ message TfOpStats { optional HloExtraInfoMapResult hlo_extrainfo_map = 5; // Overall matrix unit utilization in percentage. optional double matrix_unit_utilization_percent = 6; + // The run environment of this profiling session. + optional RunEnvironmentResult run_environment = 7; } -- GitLab From 466040ca83a29d9842c4f44b56f51e99a16083dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 18:39:43 -0800 Subject: [PATCH 0036/1225] Renaming feature_id to dimension_id in dense float split PiperOrigin-RevId: 176055428 --- .../kernels/split_handler_ops.cc | 4 ++-- .../boosted_trees/lib/trees/decision_tree.cc | 24 +++++++++---------- .../lib/trees/decision_tree_test.cc | 6 ++--- .../boosted_trees/proto/tree_config.proto | 6 ++--- .../kernel_tests/prediction_ops_test.py | 2 +- .../kernel_tests/split_handler_ops_test.py | 8 +++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc index 3bd30d8678..5c31980359 100644 --- a/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/split_handler_ops.cc @@ -490,11 +490,11 @@ class BuildSparseInequalitySplitsOp : public BaseBuildSplitOp { } dense_split->set_feature_column(feature_column_group_id_); // Set the feature index for the best feature column. - const int64 best_feature_id = + const int64 best_dimension_id = bucket_ids_and_dimensions(best_element_idx, 1); const int32 best_bucket_id = bucket_ids_and_dimensions(best_element_idx, 0); - dense_split->set_feature_id(best_feature_id); + dense_split->set_dimension_id(best_dimension_id); dense_split->set_threshold(bucket_boundaries(best_bucket_id)); auto* left_child = split_info.mutable_left_child(); diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc index f8750e7191..0e5578693a 100644 --- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc +++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree.cc @@ -52,13 +52,13 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config, example.sparse_float_features[split.feature_column()]; // Feature id for the split when multivalent sparse float column, or 0 // by default. - const int32 feature_id = split.feature_id(); + const int32 dimension_id = split.dimension_id(); - node_id = - !sparse_feature[feature_id].has_value() || - sparse_feature[feature_id].get_value() <= split.threshold() - ? split.left_id() - : split.right_id(); + node_id = !sparse_feature[dimension_id].has_value() || + sparse_feature[dimension_id].get_value() <= + split.threshold() + ? split.left_id() + : split.right_id(); break; } case TreeNode::kSparseFloatBinarySplitDefaultRight: { @@ -68,12 +68,12 @@ int DecisionTree::Traverse(const DecisionTreeConfig& config, example.sparse_float_features[split.feature_column()]; // Feature id for the split when multivalent sparse float column, or 0 // by default. - const int32 feature_id = split.feature_id(); - node_id = - sparse_feature[feature_id].has_value() && - sparse_feature[feature_id].get_value() <= split.threshold() - ? split.left_id() - : split.right_id(); + const int32 dimension_id = split.dimension_id(); + node_id = sparse_feature[dimension_id].has_value() && + sparse_feature[dimension_id].get_value() <= + split.threshold() + ? split.left_id() + : split.right_id(); break; } case TreeNode::kCategoricalIdBinarySplit: { diff --git a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc index 93924d429c..58fe8e335a 100644 --- a/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/trees/decision_tree_test.cc @@ -190,7 +190,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { tree_config.add_nodes()->mutable_leaf(); // Split on first column - split_node->set_feature_id(0); + split_node->set_dimension_id(0); split_node->set_threshold(2.0f); // Both instances have this feature value. @@ -199,7 +199,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it)); // Split on second column - split_node->set_feature_id(1); + split_node->set_dimension_id(1); split_node->set_threshold(5.0f); // First instance does not have it (default right), second does have it. @@ -208,7 +208,7 @@ TEST_F(DecisionTreeTest, TraverseSparseBinarySplit) { EXPECT_EQ(1, DecisionTree::Traverse(tree_config, 0, *++example_it)); // Split on third column - split_node->set_feature_id(2); + split_node->set_dimension_id(2); split_node->set_threshold(3.0f); example_it = example_iterable.begin(); diff --git a/tensorflow/contrib/boosted_trees/proto/tree_config.proto b/tensorflow/contrib/boosted_trees/proto/tree_config.proto index f14abf45a5..fc570c1083 100644 --- a/tensorflow/contrib/boosted_trees/proto/tree_config.proto +++ b/tensorflow/contrib/boosted_trees/proto/tree_config.proto @@ -53,9 +53,9 @@ message DenseFloatBinarySplit { // Float feature column and split threshold describing // the rule feature <= threshold. int32 feature_column = 1; - // If feature column is multivalent, this holds the index of the feature for - // the split. Defaults to 0. - int32 feature_id = 5; + // If feature column is multivalent, this holds the index of the dimensiong + // for the split. Defaults to 0. + int32 dimension_id = 5; float threshold = 2; // Node children indexing into a contiguous diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index 9ada844601..c1acf35160 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -93,7 +93,7 @@ def _set_float_split(split, feat_col, thresh, l_id, r_id, feature_dim_id=None): split.left_id = l_id split.right_id = r_id if feature_dim_id is not None: - split.feature_id = feature_dim_id + split.dimension_id = feature_dim_id def _set_categorical_id_split(split, feat_col, feat_id, l_id, r_id): diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py index 7c2e3a3b20..28834ef55b 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/split_handler_ops_test.py @@ -240,7 +240,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Sparse is one dimensional. - self.assertEqual(0, split_node.split.feature_id) + self.assertEqual(0, split_node.split.dimension_id) self.assertAllClose(0.52, split_node.split.threshold) @@ -263,7 +263,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Sparse is one dimensional. - self.assertEqual(0, split_node.split.feature_id) + self.assertEqual(0, split_node.split.dimension_id) self.assertAllClose(0.52, split_node.split.threshold) @@ -373,7 +373,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertEqual(0, split_node.split.feature_column) # Split happened on second dimension. - self.assertEqual(1, split_node.split.feature_id) + self.assertEqual(1, split_node.split.dimension_id) self.assertAllClose(0.58, split_node.split.threshold) @@ -395,7 +395,7 @@ class SplitHandlerOpsTest(test_util.TensorFlowTestCase): self.assertAllClose([expected_right_weight], right_child.value) self.assertEqual(0, split_node.split.feature_column) - self.assertEqual(2, split_node.split.feature_id) + self.assertEqual(2, split_node.split.dimension_id) self.assertAllClose(0.6, split_node.split.threshold) -- GitLab From 0833a3646f90ebaa9d92e90f4ae6326aac13a01c Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 16 Nov 2017 18:46:19 -0800 Subject: [PATCH 0037/1225] Adds sleep before close session in TPU Estimator PiperOrigin-RevId: 176055885 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 97b2d25e0c..fe17664d7f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -23,6 +23,8 @@ import collections from contextlib import contextmanager import copy import threading +import time + import six from six.moves import queue as Queue # pylint: disable=redefined-builtin @@ -490,11 +492,28 @@ class _InfeedThreadController(_InfeedOutfeedThreadBaseController): count += 1 except Exception: # pylint: disable=broad-except + # Close the session to avoid the main thread from hanging. If input + # pipeline triggers any error, the infeed thread dies but the main thread + # for TPU computation waits for the infeed enqueue forever. Close the + # Session to cancel the main thread Session.run execution. + # + # However, sleep for 2 minutes before explicit closing to give some time + # for the TPU compilation error, if any, propagating, from TPU to CPU + # host. Compilation errors should be reported by the main thread so that + # the program can be interrupted and users can take action. Due to a race + # condition, the infeed thread might see an error first. Closing the + # session here immediately would result in a session cancellation + # exception in the main thread, instead of the expected compile error. + # User code that depends on having the proper exception type will + # therefore be confused. logging.error( 'Failed running infeed, closing session.\n' - 'You may see an exception from your main session after this.', + 'You may see an exception from your main session after this. ' + 'Sleep for 2 minutes before close Session from infeed thread to ' + 'allow the main thread returning an error first, if any.', exc_info=1 ) + time.sleep(120) session.close() def join(self): -- GitLab From 6434efb9a7db19171d7a3f6e4608af0f03882267 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:04:39 -0800 Subject: [PATCH 0038/1225] Use idiomatic grpc::Slice API that allows use of different backing buffer PiperOrigin-RevId: 176057178 --- .../rpc/grpc_tensor_coding.cc | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc index 5639691804..e51894b4c7 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.cc @@ -214,22 +214,13 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val, if (tensor_data_is_large) { // (E) Encode tensor data, but by sharing backing store - - // TODO(vpai): Use the pure C++ ::grpc::Slice constructor that uses - // grpc_slice_new_with_user_data once TensorFlow pins a version of gRPC - // that includes https://github.com/grpc/grpc/pull/12065 - const TensorBuffer* buf = DMAHelper::buffer(&val); buf->Ref(); slices[1] = ::grpc::Slice( - grpc_slice_new_with_user_data( - const_cast(static_cast(tdata.data())), - tdata.size(), - [](void* backing) { - static_cast(backing)->Unref(); - }, - const_cast(buf)), - ::grpc::Slice::STEAL_REF); + const_cast(static_cast(tdata.data())), + tdata.size(), + [](void* backing) { static_cast(backing)->Unref(); }, + const_cast(buf)); num_slices += 1; } size_t total_bytes = 0; -- GitLab From 75775514239bbbf2916c5aa93ef2fbd29b02cb7f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:05:35 -0800 Subject: [PATCH 0039/1225] Hlo parser: allow empty convolution window. Window is not required for a convolution on a 2D shape. PiperOrigin-RevId: 176057261 --- tensorflow/compiler/xla/service/hlo_instruction.cc | 2 +- tensorflow/compiler/xla/tools/parser/hlo_parser.cc | 5 ++++- tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index c046b6d9c8..a0795a7b36 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1898,7 +1898,7 @@ std::vector HloInstruction::ExtraAttributesToString() const { if (CanHaveDimensionsField()) { extra.push_back(StrCat("dimensions={", Join(dimensions(), ","), "}")); } - if (window_ != nullptr) { + if (window_ != nullptr && window_->dimensions_size() != 0) { extra.push_back(StrCat("window={", window_util::ToString(*window_), "}")); } if (padding_config_ != nullptr) { diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index 2112b3e710..1767d712d7 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -564,13 +564,16 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, case HloOpcode::kConvolution: { optional window; optional dnums; - attrs["window"] = {/*required=*/true, AttrTy::kWindow, &window}; + attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; attrs["dim_labels"] = {/*required=*/true, AttrTy::kConvolutionDimensionNumbers, &dnums}; if (!ParseOperands(&operands, /*expected_size=*/2) || !ParseAttributes(attrs)) { return false; } + if (!window) { + window.emplace(); + } instruction = builder->AddInstruction(HloInstruction::CreateConvolve( shape, /*lhs=*/operands[0], /*rhs=*/operands[1], *window, *dnums)); break; diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index cb02ef84a9..3fbbfbdead 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -312,7 +312,7 @@ R"(HloModule ConvolveR2_module: ENTRY %ConvolveR2.v3 (input: f32[1,2], filter: f32[1,1]) -> f32[1,2] { %input = f32[1,2]{1,0} parameter(0) %filter = f32[1,1]{1,0} parameter(1) - ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), window={size=1}, dim_labels=bf_io->bf + ROOT %convolution = f32[1,2]{0,1} convolution(f32[1,2]{1,0} %input, f32[1,1]{1,0} %filter), dim_labels=bf_io->bf } )" -- GitLab From 15907659888a3e36e8de3d5a95de8d3327cb7c46 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 16 Nov 2017 19:10:45 -0800 Subject: [PATCH 0040/1225] [tf.data] Add experimental API for gathering statistics from an Iterator. PiperOrigin-RevId: 176057576 --- .../contrib/data/python/kernel_tests/BUILD | 13 ++ .../kernel_tests/stats_dataset_ops_test.py | 213 ++++++++++++++++++ tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/stats_ops.py | 177 +++++++++++++++ tensorflow/core/kernels/BUILD | 37 +++ tensorflow/core/kernels/dataset.h | 22 +- tensorflow/core/kernels/iterator_ops.cc | 43 ++++ tensorflow/core/kernels/stats_aggregator.h | 84 +++++++ .../core/kernels/stats_aggregator_ops.cc | 108 +++++++++ tensorflow/core/kernels/stats_dataset_ops.cc | 181 +++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 47 ++++ 11 files changed, 924 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py create mode 100644 tensorflow/contrib/data/python/ops/stats_ops.py create mode 100644 tensorflow/core/kernels/stats_aggregator.h create mode 100644 tensorflow/core/kernels/stats_aggregator_ops.cc create mode 100644 tensorflow/core/kernels/stats_dataset_ops.cc diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index c61f61263f..0dac03d7d8 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -424,6 +424,19 @@ py_test( ], ) +py_test( + name = "stats_dataset_ops_test", + size = "small", + srcs = ["stats_dataset_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + ], +) + py_test( name = "zip_dataset_op_test", size = "small", diff --git a/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py new file mode 100644 index 0000000000..8f24d6b2f6 --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/stats_dataset_ops_test.py @@ -0,0 +1,213 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline statistics gathering ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import stats_ops +from tensorflow.core.framework import summary_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class StatsDatasetTest(test.TestCase): + + def _assertSummaryHasCount(self, summary_str, tag, expected_value): + summary_proto = summary_pb2.Summary() + summary_proto.ParseFromString(summary_str) + for value in summary_proto.value: + if tag == value.tag: + self.assertEqual(expected_value, value.histo.num) + return + self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) + + def _assertSummaryHasSum(self, summary_str, tag, expected_value): + summary_proto = summary_pb2.Summary() + summary_proto.ParseFromString(summary_str) + for value in summary_proto.value: + if tag == value.tag: + self.assertEqual(expected_value, value.histo.sum) + return + self.fail("Expected tag %r not found in summary %r" % (tag, summary_proto)) + + def testBytesProduced(self): + dataset = dataset_ops.Dataset.range(100).map( + lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( + stats_ops.bytes_produced_stats("bytes_produced")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + expected_sum = 0.0 + for i in range(100): + self.assertAllEqual( + np.array([i] * i, dtype=np.int64), sess.run(next_element)) + summary_str = sess.run(summary_t) + self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) + expected_sum += i * 8.0 + self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + summary_str = sess.run(summary_t) + self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) + self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) + + def testLatencyStats(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0) + + def testReinitialize(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run(stats_aggregator_subscriber) + for j in range(5): + sess.run(iterator.initializer) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float((j * 100) + i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", (j + 1) * 100.0) + + def testNoAggregatorRegistered(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testMultipleTags(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")).apply( + stats_ops.latency_stats("record_latency_2")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(i + 1)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency_2", float(i + 1)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency_2", 100.0) + + def testRepeatedTags(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscriber = stats_aggregator.subscribe(iterator) + next_element = iterator.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator.initializer, stats_aggregator_subscriber]) + for i in range(100): + self.assertEqual(i, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(2 * (i + 1))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) + + def testMultipleIteratorsSameAggregator(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator_0 = dataset.make_initializable_iterator() + iterator_1 = dataset.make_initializable_iterator() + stats_aggregator = stats_ops.StatsAggregator() + stats_aggregator_subscribers = [stats_aggregator.subscribe(iterator_0), + stats_aggregator.subscribe(iterator_1)] + next_element = iterator_0.get_next() + iterator_1.get_next() + summary_t = stats_aggregator.get_summary() + + with self.test_session() as sess: + sess.run([iterator_0.initializer, iterator_1.initializer, + stats_aggregator_subscribers]) + for i in range(100): + self.assertEqual(i * 2, sess.run(next_element)) + self._assertSummaryHasCount( + sess.run(summary_t), "record_latency", float(2 * (i + 1))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0) + + def testMultipleStatsAggregatorsSameIteratorFail(self): + dataset = dataset_ops.Dataset.range(100).apply( + stats_ops.latency_stats("record_latency")) + iterator = dataset.make_initializable_iterator() + stats_aggregator_0 = stats_ops.StatsAggregator() + stats_aggregator_1 = stats_ops.StatsAggregator() + + with self.test_session() as sess: + sess.run(stats_aggregator_0.subscribe(iterator)) + # TODO(mrry): Consider making this allowable (and also allowing + # aggregators to unsubscribe). + with self.assertRaises(errors.FailedPreconditionError): + sess.run(stats_aggregator_1.subscribe(iterator)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index d6aaa12f5b..86035f3a69 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -71,6 +71,7 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", + "stats_ops.py", ], srcs_version = "PY2AND3", deps = [ diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py new file mode 100644 index 0000000000..b8875bd533 --- /dev/null +++ b/tensorflow/contrib/data/python/ops/stats_ops.py @@ -0,0 +1,177 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental API for gathering statistics from `tf.data` pipelines.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import nest +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_dataset_ops + + +class StatsAggregator(object): + """A stateful resource that aggregates statistics from one or more iterators. + + To record statistics, use one of the custom transformation functions defined + in this module when defining your @{tf.data.Dataset}. All statistics will be + aggregated by the `StatsAggregator` that is associated with a particular + iterator (see below). For example, to record the total number of bytes + produced by iterating over a dataset: + + ```python + dataset = ... + dataset = dataset.apply(stats_ops.bytes_produced_stats("total_bytes")) + ``` + + To associate a `StatsAggregator` with a @{tf.data.Iterator} object, use + the following pattern: + + ```python + dataset = ... + iterator = dataset.make_one_shot_iterator() + stats_aggregator = stats_ops.StatsAggregator() + set_op = stats_op.set_stats_aggregator_op(iterator, stats_aggregator) + + with tf.Session() as sess: + # Running `set_op` will associate `iterator` with `stats_aggregator`. + sess.run(set_op) + ``` + + To get a protocol buffer summary of the currently aggregated statistics, + use the `StatsAggregator.get_summary()` tensor. The easiest way to do this + is to add the returned tensor to the @{tf.GraphKeys.SUMMARIES} collection, + so that the summaries will be included with any existing summaries. + + ```python + stats_aggregator = stats_ops.StatsAggregator() + stats_summary = stats_aggregator.get_summary() + tf.add_to_collection(tf.GraphKeys.SUMMARIES, stats_summary) + ``` + + Note: This interface is experimental and expected to change. In particular, + we expect to add other implementations of `StatsAggregator` that provide + different ways of exporting statistics, and add more types of statistics. + """ + + def __init__(self): + """Creates a `StatsAggregator`.""" + self._resource = gen_dataset_ops.stats_aggregator_handle() + + def get_summary(self): + """Returns a string @{tf.Tensor} that summarizes the aggregated statistics. + + The returned tensor will contain a serialized @{tf.summary.Summary} protocol + buffer, which can be used with the standard TensorBoard logging facilities. + + Returns: + A scalar string @{tf.Tensor} that summarizes the aggregated statistics. + """ + return gen_dataset_ops.stats_aggregator_summary(self._resource) + + def subscribe(self, iterator): + """Returns a @{tf.Operation} to associate this aggregator with `iterator`. + + Note: Each @{tf.data.Iterator} can be associated with at most one + `StatsAggregator`. After running the operation that this function + returns, all statistics recorded in the iteration of `iterator` + will be stored in `stats_aggregator`. + + Args: + iterator: A @{tf.data.Iterator} object. + + Returns: + A @{tf.Operation} that, when run, associates this aggregator with + `iterator`. + """ + if not isinstance(iterator, iterator_ops.Iterator): + raise TypeError("`iterator` must be a `tf.data.Iterator` object.") + return gen_dataset_ops.iterator_set_stats_aggregator( + iterator._iterator_resource, self._resource) # pylint: disable=protected-access + + +def bytes_produced_stats(tag): + """Records the number of bytes produced by each element of the input dataset. + + To consume the statistics, associate a `StatsAggregator` with an iterator + over the output dataset. + + Args: + tag: String. All statistics recorded by the returned transformation will + be associated with the given `tag`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _StatsDataset(dataset, gen_dataset_ops.bytes_produced_stats_dataset, + tag) + + return _apply_fn + + +def latency_stats(tag): + """Records the latency of producing each element of the input dataset. + + To consume the statistics, associate a `StatsAggregator` with an iterator + over the output dataset. + + Args: + tag: String. All statistics recorded by the returned transformation will + be associated with the given `tag`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.contrib.data.Dataset.apply}. + """ + + def _apply_fn(dataset): + return _StatsDataset(dataset, gen_dataset_ops.latency_stats_dataset, tag) + + return _apply_fn + + +class _StatsDataset(dataset_ops.Dataset): + """A `Dataset` that acts as an identity, and also records statistics.""" + + def __init__(self, input_dataset, op_function, tag): + super(_StatsDataset, self).__init__() + self._input_dataset = input_dataset + self._op_function = op_function + self._tag = ops.convert_to_tensor(tag, dtype=dtypes.string) + + def _as_variant_tensor(self): + return self._op_function( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._tag, + output_shapes=nest.flatten(self.output_shapes), + output_types=nest.flatten(self.output_types)) + + @property + def output_shapes(self): + return self._input_dataset.output_shapes + + @property + def output_types(self): + return self._input_dataset.output_types + + @property + def output_classes(self): + return self._input_dataset.output_classes diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b7386abdea..00cf3f90e9 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5752,6 +5752,26 @@ tf_mkl_kernel_library( ], ) +cc_library( + name = "stats_aggregator", + hdrs = ["stats_aggregator.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "stats_aggregator_ops", + srcs = ["stats_aggregator_ops.cc"], + deps = [ + ":stats_aggregator", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], +) + cc_library( name = "dataset", srcs = ["dataset.cc"], @@ -5760,6 +5780,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/util/tensor_bundle", ], ) @@ -6032,6 +6053,19 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "stats_dataset_ops", + srcs = ["stats_dataset_ops.cc"], + deps = [ + ":dataset", + ":stats_aggregator", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + tf_kernel_library( name = "range_dataset_op", srcs = ["range_dataset_op.cc"], @@ -6157,6 +6191,7 @@ tf_kernel_library( deps = [ ":dataset", ":ops_util", + ":stats_aggregator", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", @@ -6206,6 +6241,8 @@ tf_kernel_library( ":skip_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", + ":stats_aggregator_ops", + ":stats_dataset_ops", ":take_dataset_op", ":tensor_dataset_op", ":tensor_slice_dataset_op", diff --git a/tensorflow/core/kernels/dataset.h b/tensorflow/core/kernels/dataset.h index df75deacbe..c266bc07c1 100644 --- a/tensorflow/core/kernels/dataset.h +++ b/tensorflow/core/kernels/dataset.h @@ -41,8 +41,6 @@ limitations under the License. namespace tensorflow { -class ResourceMgr; - // Interface for reading values from a key-value store. // Used for restoring iterator state. class IteratorStateReader { @@ -308,6 +306,8 @@ class GraphDefBuilderWrapper { GraphDefBuilder* b_; }; +class StatsAggregator; + // A cut-down version of OpKernelContext for running computations in // iterators. Note that we cannot simply use OpKernelContext here // because we might run computation in an iterator whose lifetime is @@ -331,6 +331,16 @@ class IteratorContext { // Function call support. std::function)> runner = nullptr; + + // A function that returns the current `StatsAggregator` instance to be + // used when recording statistics about the iterator. + // + // NOTE(mrry): This is somewhat awkward, because (i) the `StatsAggregator` + // is a property of the `IteratorResource` (which this class does not know + // about), and (ii) it can change after the `IteratorContext` has been + // created. Better suggestions are welcome! + std::function()> stats_aggregator_getter = + nullptr; }; explicit IteratorContext(Params params) : params_(std::move(params)) {} @@ -341,6 +351,14 @@ class IteratorContext { return ¶ms_.runner; } + std::shared_ptr stats_aggregator() { + if (params_.stats_aggregator_getter) { + return params_.stats_aggregator_getter(); + } else { + return nullptr; + } + } + private: Params params_; }; diff --git a/tensorflow/core/kernels/iterator_ops.cc b/tensorflow/core/kernels/iterator_ops.cc index b48da5b326..439775157b 100644 --- a/tensorflow/core/kernels/iterator_ops.cc +++ b/tensorflow/core/kernels/iterator_ops.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/kernels/dataset.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/stats_aggregator.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" @@ -168,6 +169,16 @@ class IteratorResource : public ResourceBase { return Status::OK(); } + void set_stats_aggregator(std::shared_ptr stats_aggregator) { + mutex_lock l(mu_); + stats_aggregator_ = std::move(stats_aggregator); + } + + std::shared_ptr stats_aggregator() { + tf_shared_lock l(mu_); + return stats_aggregator_; + } + string DebugString() override { return "Iterator resource"; } const DataTypeVector& output_dtypes() const { return output_dtypes_; } @@ -178,6 +189,8 @@ class IteratorResource : public ResourceBase { private: std::shared_ptr iterator_; + mutex mu_; + std::shared_ptr stats_aggregator_ GUARDED_BY(mu_); const DataTypeVector output_dtypes_; const std::vector output_shapes_; const int graph_def_version_; @@ -684,6 +697,9 @@ class IteratorGetNextOp : public AsyncOpKernel { IteratorContext::Params params; params.env = ctx->env(); + params.stats_aggregator_getter = [iterator]() { + return iterator->stats_aggregator(); + }; params.runner = *(ctx->runner()); IteratorContext iter_ctx(std::move(params)); @@ -835,6 +851,31 @@ class DeserializeIteratorOp : public OpKernel { } }; +class IteratorSetStatsAggregatorOp : public OpKernel { + public: + explicit IteratorSetStatsAggregatorOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + IteratorResource* iterator_resource; + OP_REQUIRES_OK( + ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource)); + core::ScopedUnref unref_iterator(iterator_resource); + + StatsAggregatorResource* stats_aggregator_resource; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 1), + &stats_aggregator_resource)); + core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource); + // TODO(mrry): Consider allowing multiple StatsAggregator ops to + // subscribe to updates, and/or unsubscribing. + OP_REQUIRES(ctx, !iterator_resource->stats_aggregator(), + errors::FailedPrecondition( + "Iterator already associated with a StatsAggregator")); + iterator_resource->set_stats_aggregator( + stats_aggregator_resource->stats_aggregator()); + } +}; + REGISTER_KERNEL_BUILDER(Name("Iterator").Device(DEVICE_CPU), IteratorHandleOp); REGISTER_KERNEL_BUILDER(Name("MakeIterator").Device(DEVICE_CPU), MakeIteratorOp); @@ -852,6 +893,8 @@ REGISTER_KERNEL_BUILDER(Name("SerializeIterator").Device(DEVICE_CPU), SerializeIteratorOp); REGISTER_KERNEL_BUILDER(Name("DeserializeIterator").Device(DEVICE_CPU), DeserializeIteratorOp); +REGISTER_KERNEL_BUILDER(Name("IteratorSetStatsAggregator").Device(DEVICE_CPU), + IteratorSetStatsAggregatorOp); } // namespace diff --git a/tensorflow/core/kernels/stats_aggregator.h b/tensorflow/core/kernels/stats_aggregator.h new file mode 100644 index 0000000000..5f602c5f3b --- /dev/null +++ b/tensorflow/core/kernels/stats_aggregator.h @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ + +#include +#include + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tensorflow { + +class Summary; + +// A `StatsAggregator` accumulates statistics incrementally. A +// `StatsAggregator` can accumulate multiple different statistics, distinguished +// by a string name. +// +// The class currently supports accumulating `Histogram` objects, and we expect +// to add other methods in future. +// +// NOTE(mrry): `StatsAggregator` is a virtual interface because we anticipate +// that many different implementations will the same interface. For example, the +// current implementation in "stats_aggregator_ops.cc" is a simple in-memory +// implementation that integrates with the pull-based summary API, and we may +// add implementations that work with the push-based `SummaryWriterInterface`, +// as well as custom monitoring services. +class StatsAggregator { + public: + virtual ~StatsAggregator() {} + + // Add the given `values` to the histogram with the given `name`. Each + // element of `values` will be treated as a separate sample in the histogram. + virtual void AddToHistogram(const string& name, + gtl::ArraySlice values) = 0; + + // Stores a protocol buffer representation of the aggregator state in the + // given `out_summary`. + // TODO(mrry): Consider separating this method from the `StatsAggregator` + // interface. It is possible that not all implementations will support + // encoding their state as a protocol buffer. + virtual void EncodeToProto(Summary* out_summary) = 0; +}; + +// A `StatsAggregatorResource` wraps a shareable `StatsAggregator` as a resource +// in the TensorFlow resource manager. +// +// NOTE(mrry): This class is separate from `StatsAggregator` in order to +// simplify the memory management of the shared object. Most users of +// `StatsAggregator` interact with a `std::shared_ptr` whereas +// the `ResourceBase` API requires explicit reference counting. +class StatsAggregatorResource : public ResourceBase { + public: + // Creates a new resource from the given `stats_aggregator`. + StatsAggregatorResource(std::unique_ptr stats_aggregator) + : stats_aggregator_(stats_aggregator.release()) {} + + // Returns the wrapped `StatsAggregator`. + std::shared_ptr stats_aggregator() const { + return stats_aggregator_; + } + + string DebugString() { return "StatsAggregatorResource"; } + + private: + const std::shared_ptr stats_aggregator_; +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_STATS_AGGREGATOR_H_ diff --git a/tensorflow/core/kernels/stats_aggregator_ops.cc b/tensorflow/core/kernels/stats_aggregator_ops.cc new file mode 100644 index 0000000000..037ec64a83 --- /dev/null +++ b/tensorflow/core/kernels/stats_aggregator_ops.cc @@ -0,0 +1,108 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/stats_aggregator.h" + +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_op_kernel.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +namespace { + +class StatsAggregatorImpl : public StatsAggregator { + public: + StatsAggregatorImpl() {} + + void AddToHistogram(const string& name, + gtl::ArraySlice values) override { + mutex_lock l(mu_); + histogram::Histogram& histogram = histograms_[name]; + for (double value : values) { + histogram.Add(value); + } + } + + void EncodeToProto(Summary* out_summary) override { + mutex_lock l(mu_); + for (const auto& pair : histograms_) { + const string& name = pair.first; + const histogram::Histogram& histogram = pair.second; + + Summary::Value* value = out_summary->add_value(); + value->set_tag(name); + histogram.EncodeToProto(value->mutable_histo(), + true /* preserve_zero_buckets */); + } + } + + private: + mutex mu_; + std::unordered_map histograms_ GUARDED_BY(mu_); + TF_DISALLOW_COPY_AND_ASSIGN(StatsAggregatorImpl); +}; + +class StatsAggregatorHandleOp + : public ResourceOpKernel { + public: + explicit StatsAggregatorHandleOp(OpKernelConstruction* ctx) + : ResourceOpKernel(ctx) {} + + private: + Status CreateResource(StatsAggregatorResource** ret) override + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = new StatsAggregatorResource( + std::unique_ptr(new StatsAggregatorImpl)); + return Status::OK(); + } + + Status VerifyResource(StatsAggregatorResource* resource) override { + return Status::OK(); + } +}; + +class StatsAggregatorSummaryOp : public OpKernel { + public: + explicit StatsAggregatorSummaryOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& resource_handle_t = ctx->input(0); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(resource_handle_t.shape()), + errors::InvalidArgument("resource_handle must be a scalar")); + + StatsAggregatorResource* resource; + OP_REQUIRES_OK(ctx, + LookupResource(ctx, HandleFromInput(ctx, 0), &resource)); + core::ScopedUnref unref_iterator(resource); + + Tensor* summary_t; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &summary_t)); + Summary summary; + resource->stats_aggregator()->EncodeToProto(&summary); + summary_t->scalar()() = summary.SerializeAsString(); + } +}; + +REGISTER_KERNEL_BUILDER(Name("StatsAggregatorHandle").Device(DEVICE_CPU), + StatsAggregatorHandleOp); +REGISTER_KERNEL_BUILDER(Name("StatsAggregatorSummary").Device(DEVICE_CPU), + StatsAggregatorSummaryOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/stats_dataset_ops.cc b/tensorflow/core/kernels/stats_dataset_ops.cc new file mode 100644 index 0000000000..7b1853aba6 --- /dev/null +++ b/tensorflow/core/kernels/stats_dataset_ops.cc @@ -0,0 +1,181 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/dataset.h" +#include "tensorflow/core/kernels/stats_aggregator.h" +#include "tensorflow/core/lib/random/random.h" + +namespace tensorflow { +namespace { + +// This op defines a `Dataset` that passes through its input elements and +// records the latency of producing each element in the context's +// `StatsAggregator`. +// +// TODO(mrry): It is likely that many *StatsDatasetOp kernels will have the +// same or similar structure. We should abstract the common boilerplate into +// a base case and/or investigate how to make general-purpose *StatsDatasetOp +// kernels that use TensorFlow functions to represent their logic. For example, +// if the performance were adequate, we might replace this kernel with an +// implementation that executes functions before and after the `GetNext()` call +// on the input, each executing an op that gets the current time and performing +// the subtraction. +class LatencyStatsDatasetOp : public UnaryDatasetOpKernel { + public: + explicit LatencyStatsDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + string tag; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag)); + *output = new Dataset(input, std::move(tag)); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(const DatasetBase* input, string tag) + : input_(input), tag_(std::move(tag)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::LatencyStats")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() override { return "LatencyStatsDatasetOp::Dataset"; } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + uint64 start = ctx->env()->NowMicros(); + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + uint64 end = ctx->env()->NowMicros(); + auto stats_aggregator = ctx->stats_aggregator(); + if (stats_aggregator && !*end_of_sequence) { + ctx->stats_aggregator()->AddToHistogram( + dataset()->tag_, {static_cast(end - start)}); + } + return s; + } + + private: + const std::unique_ptr input_impl_; + }; + + const DatasetBase* const input_; + const string tag_; + }; +}; + +class BytesProducedStatsDatasetOp : public UnaryDatasetOpKernel { + public: + explicit BytesProducedStatsDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) {} + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + string tag; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "tag", &tag)); + *output = new Dataset(input, std::move(tag)); + } + + private: + class Dataset : public DatasetBase { + public: + explicit Dataset(const DatasetBase* input, string tag) + : input_(input), tag_(std::move(tag)) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + {this, strings::StrCat(prefix, "::BytesProducedStats")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() override { + return "BytesProducedStatsDatasetOp::Dataset"; + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + Status s = input_impl_->GetNext(ctx, out_tensors, end_of_sequence); + auto stats_aggregator = ctx->stats_aggregator(); + if (stats_aggregator && s.ok() && !*end_of_sequence) { + size_t total_bytes = 0; + for (const Tensor& t : *out_tensors) { + total_bytes += t.TotalBytes(); + } + ctx->stats_aggregator()->AddToHistogram( + dataset()->tag_, {static_cast(total_bytes)}); + } + return s; + } + + private: + const std::unique_ptr input_impl_; + }; + + const DatasetBase* const input_; + const string tag_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("LatencyStatsDataset").Device(DEVICE_CPU), + LatencyStatsDatasetOp); +REGISTER_KERNEL_BUILDER(Name("BytesProducedStatsDataset").Device(DEVICE_CPU), + BytesProducedStatsDatasetOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f512213964..6bf226e7a5 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -151,6 +151,28 @@ REGISTER_OP("IgnoreErrorsDataset") Creates a dataset that contains the elements of `input_dataset` ignoring errors. )doc"); +REGISTER_OP("BytesProducedStatsDataset") + .Input("input_dataset: variant") + .Input("tag: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Records the bytes size of each element of `input_dataset` in a StatsAggregator. +)doc"); + +REGISTER_OP("LatencyStatsDataset") + .Input("input_dataset: variant") + .Input("tag: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Records the latency of producing `input_dataset` elements in a StatsAggregator. +)doc"); + REGISTER_OP("MapDataset") .Input("input_dataset: variant") .Input("other_arguments: Targuments") @@ -744,4 +766,29 @@ serialized: A variant tensor storing the state of the iterator contained in the resource. )doc"); +REGISTER_OP("StatsAggregatorHandle") + .Output("handle: resource") + .SetShapeFn(shape_inference::ScalarShape) + .Attr("container: string = ''") + .Attr("shared_name: string = ''") + .Doc(R"doc( +Creates a statistics manager resource. +)doc"); + +REGISTER_OP("IteratorSetStatsAggregator") + .Input("iterator_handle: resource") + .Input("stats_aggregator_handle: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Associates the given iterator with the given statistics aggregator. +)doc"); + +REGISTER_OP("StatsAggregatorSummary") + .Input("iterator: resource") + .Output("summary: string") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Produces a summary of any statistics recorded by the given statistics manager. +)doc"); + } // namespace tensorflow -- GitLab From 929178e1046f6387d9245c3d89ba5c3c1f3078d5 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Thu, 16 Nov 2017 19:11:55 -0800 Subject: [PATCH 0041/1225] Add documentation for how to get input names for input_fn for keras model converted estimator. PiperOrigin-RevId: 176057647 --- .../docs_src/programmers_guide/estimators.md | 26 ++++++++++++++++--- .../python/keras/_impl/keras/estimator.py | 3 +++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index d465679817..6544a16f2b 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -166,11 +166,29 @@ keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metric='accuracy') -# Create an Estimator from the compiled Keras model. +# Create an Estimator from the compiled Keras model. Note the initial model +# state of the keras model is preserved in the created Estimator. est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3) -# Treat the derived Estimator as you would any other Estimator. For example, -# the following derived Estimator calls the train method: -est_inception_v3.train(input_fn=my_training_set, steps=2000) + +# Treat the derived Estimator as you would with any other Estimator. +# First, recover the input name(s) of Keras model, so we can use them as the +# feature column name(s) of the Estimator input function: +keras_inception_v3.input_names # print out: ['input_1'] +# Once we have the input name(s), we can create the input function, for example, +# for input(s) in the format of numpy ndarray: +train_input_fn = tf.estimator.inputs.numpy_input_fn( + x={"input_1": train_data}, + y=train_labels, + num_epochs=1, + shuffle=False) +# To train, we call Estimator's train function: +est_inception_v3.train(input_fn=train_input_fn, steps=2000) ``` +Note that the names of feature columns and labels of a keras estimator come from +the corresponding compiled keras model. For example, the input key names for +@{$get_started/input_fn} in above `est_inception_v3` estimator can be obtained +from `keras_inception_v3.input_names`, and similarily, the predicted output +names can be obtained from `keras_inception_v3.output_names`. + For more details, please refer to the documentation for @{tf.keras.estimator.model_to_estimator}. diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 125e63e1b8..2e931769c7 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -232,6 +232,9 @@ def model_to_estimator(keras_model=None, config=None): """Constructs an `Estimator` instance from given keras model. + For usage example, please see + @{$programmers_guide/estimators$creating_estimators_from_keras_models}. + Args: keras_model: Keras model in memory. keras_model_path: Directory to a keras model on disk. -- GitLab From 7d17d27940aa915583b0b3e2ba77d9f708af6783 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 16 Nov 2017 19:30:05 -0800 Subject: [PATCH 0042/1225] Add WriteScalar support to SummaryDbWriter PiperOrigin-RevId: 176058700 --- tensorflow/contrib/summary/summary_ops.py | 22 ++++- .../tensorboard/db/summary_db_writer.cc | 81 ++++++++++++++----- .../tensorboard/db/summary_db_writer_test.cc | 27 +++++++ 3 files changed, 109 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index a72c0c80aa..bf810744a1 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -364,16 +364,34 @@ def generic(name, tensor, metadata=None, family=None, global_step=None): def scalar(name, tensor, family=None, global_step=None): - """Writes a scalar summary if possible.""" + """Writes a scalar summary if possible. + + Unlike @{tf.contrib.summary.generic} this op may change the dtype + depending on the writer, for both practical and efficiency concerns. + + Args: + name: An arbitrary name for this summary. + tensor: A @{tf.Tensor} Must be one of the following types: + `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`, + `int8`, `uint16`, `half`, `uint32`, `uint64`. + family: Optional, the summary's family. + global_step: The `int64` monotonic step variable, which defaults + to @{tf.train.get_global_step}. + + Returns: + The created @{tf.Operation} or a @{tf.no_op} if summary writing has + not been enabled for this context. + """ if global_step is None: global_step = training_util.get_global_step() + else: + global_step = ops.convert_to_tensor(global_step, dtypes.int64) def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, global_step, tag, array_ops.identity(tensor), name=scope) - return summary_writer_function(name, tensor, function, family=family) diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc index ae063d24ef..857e731ef2 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer.cc @@ -81,6 +81,55 @@ Status BindTensor(SqliteStatement* stmt, int parameter, const Tensor& t) { return BindProto(stmt, parameter, p); } +// Tries to fudge shape and dtype to something with smaller storage. +Status CoerceScalar(const Tensor& t, Tensor* out) { + switch (t.dtype()) { + case DT_DOUBLE: + *out = t; + break; + case DT_INT64: + *out = t; + break; + case DT_FLOAT: + *out = {DT_DOUBLE, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_HALF: + *out = {DT_DOUBLE, {}}; + out->scalar()() = static_cast(t.scalar()()); + break; + case DT_INT32: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_INT16: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_INT8: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT32: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT16: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + case DT_UINT8: + *out = {DT_INT64, {}}; + out->scalar()() = t.scalar()(); + break; + default: + return errors::Unimplemented("Scalar summary for dtype ", + DataTypeString(t.dtype()), + " is not supported."); + } + return Status::OK(); +} + class Transactor { public: explicit Transactor(std::shared_ptr db) @@ -280,20 +329,23 @@ class SummaryDbWriter : public SummaryWriterInterface { insert_tensor_.BindInt(1, tag_id); insert_tensor_.BindInt(2, global_step); insert_tensor_.BindDouble(3, GetWallTime(env_)); - switch (t.dtype()) { - case DT_INT64: - insert_tensor_.BindInt(4, t.scalar()()); - break; - case DT_DOUBLE: - insert_tensor_.BindDouble(4, t.scalar()()); - break; - default: - TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t)); - break; + if (t.shape().dims() == 0 && t.dtype() == DT_INT64) { + insert_tensor_.BindInt(4, t.scalar()()); + } else if (t.shape().dims() == 0 && t.dtype() == DT_DOUBLE) { + insert_tensor_.BindDouble(4, t.scalar()()); + } else { + TF_RETURN_IF_ERROR(BindTensor(&insert_tensor_, 4, t)); } return insert_tensor_.StepAndReset(); } + Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { + Tensor t2; + TF_RETURN_IF_ERROR(CoerceScalar(t, &t2)); + // TODO(jart): Generate scalars plugin metadata on this value. + return WriteTensor(global_step, std::move(t2), tag, ""); + } + Status WriteGraph(int64 global_step, std::unique_ptr g) override { mutex_lock ml(mu_); TF_RETURN_IF_ERROR(InitializeParents()); @@ -325,15 +377,6 @@ class SummaryDbWriter : public SummaryWriterInterface { } } - Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { - // TODO(@jart): Unlike WriteTensor, this method would be granted leniency - // to change the dtype if it saves storage space. For example, - // DT_UINT32 would be stored in the database as an INTEGER - // rather than a serialized BLOB. But when reading it back, - // the dtype would become DT_INT64. - return errors::Unimplemented("WriteScalar"); - } - Status WriteHistogram(int64 global_step, Tensor t, const string& tag) override { return errors::Unimplemented( diff --git a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc index 3431842ca2..625861fa6b 100644 --- a/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc +++ b/tensorflow/contrib/tensorboard/db/summary_db_writer_test.cc @@ -290,5 +290,32 @@ TEST_F(SummaryDbWriterTest, WriteGraph) { EXPECT_EQ(1LL, QueryInt("SELECT is_control FROM NodeInputs WHERE idx = 2")); } +TEST_F(SummaryDbWriterTest, WriteScalarInt32_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_INT32, {}); + t.scalar()() = -17; + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteScalarInt8_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_INT8, {}); + t.scalar()() = static_cast(-17); + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(-17LL, QueryInt("SELECT tensor FROM Tensors")); +} + +TEST_F(SummaryDbWriterTest, WriteScalarUint8_CoercesToInt64) { + TF_ASSERT_OK(CreateSummaryDbWriter(db_, "", "", "", &env_, &writer_)); + Tensor t(DT_UINT8, {}); + t.scalar()() = static_cast(254); + TF_ASSERT_OK(writer_->WriteScalar(1, t, "t")); + TF_ASSERT_OK(writer_->Flush()); + ASSERT_EQ(254LL, QueryInt("SELECT tensor FROM Tensors")); +} + } // namespace } // namespace tensorflow -- GitLab From 2397a7f375ab91b071126948dcae7abd3e775d3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:34:35 -0800 Subject: [PATCH 0043/1225] Update ops-related pbtxt files. PiperOrigin-RevId: 176059000 --- .../core/ops/compat/ops_history.v1.pbtxt | 100 +++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 105 ++++++++++++++++++ 2 files changed, 205 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index ffb608d600..daeb6763c8 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -6059,6 +6059,33 @@ op { type: "list(float)" } } +op { + name: "BytesProducedStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "CTCBeamSearchDecoder" input_arg { @@ -15001,6 +15028,18 @@ op { } is_stateful: true } +op { + name: "IteratorSetStatsAggregator" + input_arg { + name: "iterator_handle" + type: DT_RESOURCE + } + input_arg { + name: "stats_aggregator_handle" + type: DT_RESOURCE + } + is_stateful: true +} op { name: "IteratorToStringHandle" input_arg { @@ -15192,6 +15231,33 @@ op { } } } +op { + name: "LatencyStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { @@ -38418,6 +38484,40 @@ op { } } } +op { + name: "StatsAggregatorHandle" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "StatsAggregatorSummary" + input_arg { + name: "iterator" + type: DT_RESOURCE + } + output_arg { + name: "summary" + type: DT_STRING + } + is_stateful: true +} op { name: "StopGradient" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index d9e3dbdbb7..55a8fc9032 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -4270,6 +4270,34 @@ op { summary: "Bucketizes \'input\' based on \'boundaries\'." description: "For example, if the inputs are\n boundaries = [0, 10, 100]\n input = [[-5, 10000]\n [150, 10]\n [5, 100]]\n\nthen the output will be\n output = [[0, 3]\n [3, 2]\n [1, 3]]" } +op { + name: "BytesProducedStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Records the bytes size of each element of `input_dataset` in a StatsAggregator." +} op { name: "CTCBeamSearchDecoder" input_arg { @@ -11502,6 +11530,19 @@ op { summary: "Gets the next output from the given iterator." is_stateful: true } +op { + name: "IteratorSetStatsAggregator" + input_arg { + name: "iterator_handle" + type: DT_RESOURCE + } + input_arg { + name: "stats_aggregator_handle" + type: DT_RESOURCE + } + summary: "Associates the given iterator with the given statistics aggregator." + is_stateful: true +} op { name: "IteratorToStringHandle" input_arg { @@ -11698,6 +11739,34 @@ op { } summary: "Gradients for Local Response Normalization." } +op { + name: "LatencyStatsDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "tag" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + summary: "Records the latency of producing `input_dataset` elements in a StatsAggregator." +} op { name: "LearnedUnigramCandidateSampler" input_arg { @@ -29980,6 +30049,42 @@ op { summary: "Outputs deterministic pseudorandom values from a truncated normal distribution." description: "The generated values follow a normal distribution with mean 0 and standard\ndeviation 1, except that values whose magnitude is more than 2 standard\ndeviations from the mean are dropped and re-picked.\n\nThe outputs are a deterministic function of `shape` and `seed`." } +op { + name: "StatsAggregatorHandle" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + summary: "Creates a statistics manager resource." + is_stateful: true +} +op { + name: "StatsAggregatorSummary" + input_arg { + name: "iterator" + type: DT_RESOURCE + } + output_arg { + name: "summary" + type: DT_STRING + } + summary: "Produces a summary of any statistics recorded by the given statistics manager." + is_stateful: true +} op { name: "StopGradient" input_arg { -- GitLab From a764ec152ce8a4ebe6faf42c55a3177182389c9f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 19:45:12 -0800 Subject: [PATCH 0044/1225] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176059622 --- tensorflow/go/op/wrappers.go | 102 +++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index a910b51fb9..e650d25a32 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -5334,6 +5334,21 @@ func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged return op.Output(0) } +// Produces a summary of any statistics recorded by the given statistics manager. +func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StatsAggregatorSummary", + Input: []tf.Input{ + iterator, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FIFOQueueV2Attr is an optional argument to FIFOQueueV2. type FIFOQueueV2Attr func(optionalAttr) @@ -5950,6 +5965,23 @@ func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf. return scope.AddOperation(opspec) } +// Records the latency of producing `input_dataset` elements in a StatsAggregator. +func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "LatencyStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Concatenates tensors along one dimension. // // Arguments: @@ -6146,6 +6178,43 @@ func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_ou return op.Output(0) } +// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle. +type StatsAggregatorHandleAttr func(optionalAttr) + +// StatsAggregatorHandleContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a statistics manager resource. +func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatsAggregatorHandle", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. type CropAndResizeGradBoxesAttr func(optionalAttr) @@ -19067,6 +19136,22 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value return op.Output(0) } +// Associates the given iterator with the given statistics aggregator. +// +// Returns the created operation. +func IteratorSetStatsAggregator(scope *Scope, iterator_handle tf.Output, stats_aggregator_handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IteratorSetStatsAggregator", + Input: []tf.Input{ + iterator_handle, stats_aggregator_handle, + }, + } + return scope.AddOperation(opspec) +} + // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. type ResourceSparseApplyFtrlV2Attr func(optionalAttr) @@ -24785,6 +24870,23 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou return scope.AddOperation(opspec) } +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "BytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // QrAttr is an optional argument to Qr. type QrAttr func(optionalAttr) -- GitLab From 10581c8afee392f2455acb700ece8217a3a19a4b Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 16 Nov 2017 20:50:28 -0800 Subject: [PATCH 0045/1225] Rename global_step -> step in contrib/summary API Since it's more succinct and the API doesn't actually care if the provided step is the one true global step. PiperOrigin-RevId: 176063779 --- tensorflow/contrib/summary/summary_ops.py | 72 ++++++++++--------- .../contrib/summary/summary_ops_test.py | 4 +- tensorflow/core/kernels/summary_kernels.cc | 40 +++++------ tensorflow/core/ops/summary_ops.cc | 24 +++---- 4 files changed, 73 insertions(+), 67 deletions(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index bf810744a1..3e65f83051 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -344,10 +344,9 @@ def summary_writer_function(name, tensor, function, family=None): return op -def generic(name, tensor, metadata=None, family=None, global_step=None): +def generic(name, tensor, metadata=None, family=None, step=None): """Writes a tensor summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): if metadata is None: serialized_metadata = constant_op.constant("") @@ -358,12 +357,15 @@ def generic(name, tensor, metadata=None, family=None, global_step=None): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_summary( context.context().summary_writer_resource, - global_step, array_ops.identity(tensor), - tag, serialized_metadata, name=scope) + _choose_step(step), + array_ops.identity(tensor), + tag, + serialized_metadata, + name=scope) return summary_writer_function(name, tensor, function, family=family) -def scalar(name, tensor, family=None, global_step=None): +def scalar(name, tensor, family=None, step=None): """Writes a scalar summary if possible. Unlike @{tf.contrib.summary.generic} this op may change the dtype @@ -375,68 +377,68 @@ def scalar(name, tensor, family=None, global_step=None): `float32`, `float64`, `int32`, `int64`, `uint8`, `int16`, `int8`, `uint16`, `half`, `uint32`, `uint64`. family: Optional, the summary's family. - global_step: The `int64` monotonic step variable, which defaults + step: The `int64` monotonic step variable, which defaults to @{tf.train.get_global_step}. Returns: The created @{tf.Operation} or a @{tf.no_op} if summary writing has not been enabled for this context. """ - if global_step is None: - global_step = training_util.get_global_step() - else: - global_step = ops.convert_to_tensor(global_step, dtypes.int64) + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_scalar_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), name=scope) + return summary_writer_function(name, tensor, function, family=family) -def histogram(name, tensor, family=None, global_step=None): +def histogram(name, tensor, family=None, step=None): """Writes a histogram summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_histogram_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), name=scope) return summary_writer_function(name, tensor, function, family=family) -def image(name, tensor, bad_color=None, max_images=3, family=None, - global_step=None): +def image(name, tensor, bad_color=None, max_images=3, family=None, step=None): """Writes an image summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): bad_color_ = (constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) if bad_color is None else bad_color) # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_image_summary( context.context().summary_writer_resource, - global_step, tag, array_ops.identity(tensor), + _choose_step(step), + tag, + array_ops.identity(tensor), bad_color_, - max_images, name=scope) + max_images, + name=scope) return summary_writer_function(name, tensor, function, family=family) -def audio(name, tensor, sample_rate, max_outputs, family=None, - global_step=None): +def audio(name, tensor, sample_rate, max_outputs, family=None, step=None): """Writes an audio summary if possible.""" - if global_step is None: - global_step = training_util.get_global_step() + def function(tag, scope): # Note the identity to move the tensor to the CPU. return gen_summary_ops.write_audio_summary( context.context().summary_writer_resource, - global_step, + _choose_step(step), tag, array_ops.identity(tensor), sample_rate=sample_rate, @@ -483,15 +485,13 @@ def graph(param, step=None, name=None): if writer is None: return control_flow_ops.no_op() with ops.device("cpu:0"): - if step is None: - step = training_util.get_global_step() - else: - step = ops.convert_to_tensor(step, dtypes.int64) if isinstance(param, (ops.Graph, graph_pb2.GraphDef)): tensor = ops.convert_to_tensor(_serialize_graph(param), dtypes.string) else: tensor = array_ops.identity(param) - return gen_summary_ops.write_graph_summary(writer, step, tensor, name=name) + return gen_summary_ops.write_graph_summary( + writer, _choose_step(step), tensor, name=name) + _graph = graph # for functions with a graph parameter @@ -527,3 +527,11 @@ def _serialize_graph(arbitrary_graph): return arbitrary_graph.as_graph_def(add_shapes=True).SerializeToString() else: return arbitrary_graph.SerializeToString() + + +def _choose_step(step): + if step is None: + return training_util.get_global_step() + if not isinstance(step, ops.Tensor): + return ops.convert_to_tensor(step, dtypes.int64) + return step diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py index c5ca054f77..ad89c0c36a 100644 --- a/tensorflow/contrib/summary/summary_ops_test.py +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -97,13 +97,13 @@ class TargetTest(test_util.TensorFlowTestCase): self.assertEqual(events[1].summary.value[0].tag, 'scalar') def testSummaryGlobalStep(self): - global_step = training_util.get_or_create_global_step() + step = training_util.get_or_create_global_step() logdir = tempfile.mkdtemp() with summary_ops.create_summary_file_writer( logdir, max_queue=0, name='t2').as_default(), summary_ops.always_record_summaries(): - summary_ops.scalar('scalar', 2.0, global_step=global_step) + summary_ops.scalar('scalar', 2.0, step=step) events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc index 3706f51cf4..7487e70acc 100644 --- a/tensorflow/core/kernels/summary_kernels.cc +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -111,8 +111,8 @@ class WriteSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("summary_metadata", &tmp)); @@ -121,8 +121,7 @@ class WriteSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK(ctx, - s->WriteTensor(global_step, *t, tag, serialized_metadata)); + OP_REQUIRES_OK(ctx, s->WriteTensor(step, *t, tag, serialized_metadata)); } }; REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), @@ -158,15 +157,15 @@ class WriteScalarSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("value", &t)); - OP_REQUIRES_OK(ctx, s->WriteScalar(global_step, *t, tag)); + OP_REQUIRES_OK(ctx, s->WriteScalar(step, *t, tag)); } }; REGISTER_KERNEL_BUILDER(Name("WriteScalarSummary").Device(DEVICE_CPU), @@ -181,15 +180,15 @@ class WriteHistogramSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("values", &t)); - OP_REQUIRES_OK(ctx, s->WriteHistogram(global_step, *t, tag)); + OP_REQUIRES_OK(ctx, s->WriteHistogram(step, *t, tag)); } }; REGISTER_KERNEL_BUILDER(Name("WriteHistogramSummary").Device(DEVICE_CPU), @@ -210,8 +209,8 @@ class WriteImageSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); const Tensor* bad_color; @@ -224,8 +223,7 @@ class WriteImageSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK( - ctx, s->WriteImage(global_step, *t, tag, max_images_, *bad_color)); + OP_REQUIRES_OK(ctx, s->WriteImage(step, *t, tag, max_images_, *bad_color)); } private: @@ -247,8 +245,8 @@ class WriteAudioSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* tmp; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); - const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &tmp)); + const int64 step = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); const string& tag = tmp->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("sample_rate", &tmp)); @@ -257,8 +255,8 @@ class WriteAudioSummaryOp : public OpKernel { const Tensor* t; OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); - OP_REQUIRES_OK( - ctx, s->WriteAudio(global_step, *t, tag, max_outputs_, sample_rate)); + OP_REQUIRES_OK(ctx, + s->WriteAudio(step, *t, tag, max_outputs_, sample_rate)); } private: @@ -278,8 +276,8 @@ class WriteGraphSummaryOp : public OpKernel { OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); core::ScopedUnref unref(s); const Tensor* t; - OP_REQUIRES_OK(ctx, ctx->input("global_step", &t)); - const int64 global_step = t->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("step", &t)); + const int64 step = t->scalar()(); OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); std::unique_ptr graph{new GraphDef}; if (!ParseProtoUnlimited(graph.get(), t->scalar()())) { @@ -287,7 +285,7 @@ class WriteGraphSummaryOp : public OpKernel { errors::DataLoss("Bad tf.GraphDef binary proto tensor string")); return; } - OP_REQUIRES_OK(ctx, s->WriteGraph(global_step, std::move(graph))); + OP_REQUIRES_OK(ctx, s->WriteGraph(step, std::move(graph))); } }; REGISTER_KERNEL_BUILDER(Name("WriteGraphSummary").Device(DEVICE_CPU), diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc index 7f6d8b06cd..029ff09906 100644 --- a/tensorflow/core/ops/summary_ops.cc +++ b/tensorflow/core/ops/summary_ops.cc @@ -99,7 +99,7 @@ writer: A handle to the summary writer resource. REGISTER_OP("WriteSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tensor: T") .Input("tag: string") .Input("summary_metadata: string") @@ -109,7 +109,7 @@ REGISTER_OP("WriteSummary") Outputs a `Summary` protocol buffer with a tensor. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tensor: A tensor to serialize. tag: The summary's tag. summary_metadata: Serialized SummaryMetadata protocol buffer containing @@ -132,7 +132,7 @@ event: A string containing a binary-encoded tf.Event proto. REGISTER_OP("WriteScalarSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("value: T") .Attr("T: realnumbertype") @@ -143,14 +143,14 @@ Writes a `Summary` protocol buffer with scalar values. The input `tag` and `value` must have the scalars. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Tag for the summary. value: Value for the summary. )doc"); REGISTER_OP("WriteHistogramSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("values: T") .Attr("T: realnumbertype = DT_FLOAT") @@ -165,14 +165,14 @@ has one summary value containing a histogram for `values`. This op reports an `InvalidArgument` error if any value is not finite. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Tag to use for the `Summary.Value`. values: Any shape. Values to use to build the histogram. )doc"); REGISTER_OP("WriteImageSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("tensor: T") .Input("bad_color: uint8") @@ -217,7 +217,7 @@ replaced by this tensor in the output image. The default value is the color red. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Used to build the `tag` attribute of the summary values. tensor: 4-D of shape `[batch_size, height, width, channels]` where `channels` is 1, 3, or 4. @@ -227,7 +227,7 @@ bad_color: Color to use for pixels with non-finite values. REGISTER_OP("WriteAudioSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tag: string") .Input("tensor: float") .Input("sample_rate: float") @@ -249,7 +249,7 @@ build the `tag` of the summary values: generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. writer: A handle to a summary writer. -global_step: The step to write the summary for. +step: The step to write the summary for. tag: Scalar. Used to build the `tag` attribute of the summary values. tensor: 2-D of shape `[batch_size, frames]`. sample_rate: The sample rate of the signal in hertz. @@ -258,14 +258,14 @@ max_outputs: Max number of batch elements to generate audio for. REGISTER_OP("WriteGraphSummary") .Input("writer: resource") - .Input("global_step: int64") + .Input("step: int64") .Input("tensor: string") .SetShapeFn(shape_inference::NoOutputs) .Doc(R"doc( Writes a `GraphDef` protocol buffer to a `SummaryWriter`. writer: Handle of `SummaryWriter`. -global_step: The step to write the summary for. +step: The step to write the summary for. tensor: A scalar string of the serialized tf.GraphDef proto. )doc"); -- GitLab From 7a2a3b40d518baa0c9bc4231df434fa09857cee4 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 16 Nov 2017 21:04:50 -0800 Subject: [PATCH 0046/1225] [XLA] Rematerialization and fusion need to consider nested computations to determine if rematerializablity or Fusabiltiy. PiperOrigin-RevId: 176064783 --- .../compiler/xla/service/hlo_instruction.cc | 18 +++--------------- .../xla/service/hlo_rematerialization.cc | 9 +-------- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a0795a7b36..e3fdc53b7f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -793,7 +793,7 @@ HloInstruction* HloInstruction::FuseInstructionInternal( HloInstruction* HloInstruction::CloneAndFuseInternal( HloInstruction* instruction_to_fuse, bool add_output) { CHECK_EQ(opcode_, HloOpcode::kFusion); - CHECK(instruction_to_fuse->IsFusable()); + CHECK(instruction_to_fuse->IsFusable()) << instruction_to_fuse->ToString(); VLOG(3) << "CloneAndFuseInternal:\n" << instruction_to_fuse->ToString(); HloInstruction* clone = nullptr; if (called_computations_.empty()) { @@ -2134,25 +2134,13 @@ bool HloInstruction::IsFusable() const { if (tracing()) { return false; } - // Some kinds of instructions don't make sense to fuse. switch (opcode_) { - case HloOpcode::kInfeed: - case HloOpcode::kOutfeed: case HloOpcode::kParameter: - case HloOpcode::kTrace: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: - case HloOpcode::kSend: - case HloOpcode::kSendDone: return false; - // Only fuse Rng if it is used once, otherwise the random numbers generated - // will be different in each fusion. If it is the root (user count = 0) - // then it is the equivalent of having one user. - case HloOpcode::kRng: - return users_.size() <= 1; + // Side effecting instrutions cannot be fused. default: - return true; + return !HasSideEffect(); } } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 828be8490c..017f996bc4 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -62,18 +62,11 @@ bool IsRematerializable(const HloInstruction* instruction) { case HloOpcode::kConstant: case HloOpcode::kCrossReplicaSum: case HloOpcode::kCustomCall: - case HloOpcode::kOutfeed: - case HloOpcode::kInfeed: case HloOpcode::kParameter: - case HloOpcode::kRecv: - case HloOpcode::kRecvDone: - case HloOpcode::kSend: - case HloOpcode::kSendDone: - case HloOpcode::kTrace: case HloOpcode::kWhile: return false; default: - return true; + return !instruction->HasSideEffect(); } } -- GitLab From 389d4001261df5a0f0db1ed869e2c72fefb2297e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Nov 2017 21:13:31 -0800 Subject: [PATCH 0047/1225] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176065246 --- tensorflow/go/op/wrappers.go | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index e650d25a32..1d1383ec82 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -44,19 +44,19 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Tag for the summary. // value: Value for the summary. // // Returns the created operation. -func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { +func WriteScalarSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteScalarSummary", Input: []tf.Input{ - writer, global_step, tag, value, + writer, step, tag, value, }, } return scope.AddOperation(opspec) @@ -89,21 +89,21 @@ func ImportEvent(scope *Scope, writer tf.Output, event tf.Output) (o *tf.Operati // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tensor: A tensor to serialize. // tag: The summary's tag. // summary_metadata: Serialized SummaryMetadata protocol buffer containing // plugin-related metadata for this summary. // // Returns the created operation. -func WriteSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { +func WriteSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteSummary", Input: []tf.Input{ - writer, global_step, tensor, tag, summary_metadata, + writer, step, tensor, tag, summary_metadata, }, } return scope.AddOperation(opspec) @@ -2147,19 +2147,19 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Tag to use for the `Summary.Value`. // values: Any shape. Values to use to build the histogram. // // Returns the created operation. -func WriteHistogramSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { +func WriteHistogramSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteHistogramSummary", Input: []tf.Input{ - writer, global_step, tag, values, + writer, step, tag, values, }, } return scope.AddOperation(opspec) @@ -11101,13 +11101,13 @@ func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr { // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Used to build the `tag` attribute of the summary values. // tensor: 2-D of shape `[batch_size, frames]`. // sample_rate: The sample rate of the signal in hertz. // // Returns the created operation. -func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { +func WriteAudioSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -11118,7 +11118,7 @@ func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta opspec := tf.OpSpec{ Type: "WriteAudioSummary", Input: []tf.Input{ - writer, global_step, tag, tensor, sample_rate, + writer, step, tag, tensor, sample_rate, }, Attrs: attrs, } @@ -18248,14 +18248,14 @@ func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { // // Arguments: // writer: A handle to a summary writer. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tag: Scalar. Used to build the `tag` attribute of the summary values. // tensor: 4-D of shape `[batch_size, height, width, channels]` where // `channels` is 1, 3, or 4. // bad_color: Color to use for pixels with non-finite values. // // Returns the created operation. -func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { +func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -18266,7 +18266,7 @@ func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, ta opspec := tf.OpSpec{ Type: "WriteImageSummary", Input: []tf.Input{ - writer, global_step, tag, tensor, bad_color, + writer, step, tag, tensor, bad_color, }, Attrs: attrs, } @@ -20657,18 +20657,18 @@ func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { // // Arguments: // writer: Handle of `SummaryWriter`. -// global_step: The step to write the summary for. +// step: The step to write the summary for. // tensor: A scalar string of the serialized tf.GraphDef proto. // // Returns the created operation. -func WriteGraphSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output) (o *tf.Operation) { +func WriteGraphSummary(scope *Scope, writer tf.Output, step tf.Output, tensor tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ Type: "WriteGraphSummary", Input: []tf.Input{ - writer, global_step, tensor, + writer, step, tensor, }, } return scope.AddOperation(opspec) -- GitLab From 724ca9f1a5a7428e74b62c8e2e6061244af93ace Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 07:21:52 -0800 Subject: [PATCH 0048/1225] [XLA] Enable operand buffer aliasing for Call instructions where the unique use of operand in the called computation is the root instruction which is elemenentwise on the operand. This eliminates copies of Call instruction result buffers induced by parallel computation outlining on the XLA:CPU backend. PiperOrigin-RevId: 176106140 --- .../compiler/xla/service/liveness_util.cc | 52 ++++++++++++++++++- .../xla/service/liveness_util_test.cc | 39 ++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/liveness_util.cc b/tensorflow/compiler/xla/service/liveness_util.cc index 53d88eda7a..68c99256a2 100644 --- a/tensorflow/compiler/xla/service/liveness_util.cc +++ b/tensorflow/compiler/xla/service/liveness_util.cc @@ -103,7 +103,7 @@ namespace { // Returns all uses of all aliases of 'instruction' at 'index' in 'uses'. // Each use in 'uses' is a pair (HloInstruction* user, int64 operand_index) -// where 'user' is a user of an alias of 'intruction' at 'index', and +// where 'user' is a user of an alias of 'instruction' at 'index', and // 'operand_index' is the operand index at which the alias appears in the // operand list of 'user'. std::vector> GetAllUsesOfInstructionAtIndex( @@ -243,6 +243,31 @@ bool CanShareOperandBufferWithUser( std::vector operand_indices = user->OperandIndices(operand); return operand_indices.size() == 1 && operand_indices[0] == 0; } + if (user->opcode() == HloOpcode::kCall) { + // TODO(b/62548313): Remove when buffer assignment is module scoped and + // does not assign buffers to calls. + // Find called computation parameter associated with 'operand'. + const std::vector operand_indices = user->OperandIndices(operand); + if (operand_indices.size() > 1) { + return false; + } + CHECK_EQ(1, operand_indices.size()); + auto* param = user->to_apply()->parameter_instruction(operand_indices[0]); + // Get all uses of 'operand' at 'index' in called computation. + auto param_uses = GetAllUsesOfInstructionAtIndex(param, operand_index, + points_to_analysis); + + // Return true iff: + // *) There exists exactly one use of 'operand' in called computation. + // *) The unique use is by the root instruction of called computation. + // (Note: we check the root of the called computation, because the + // root result buffer is required to alias with the Call result buffer). + // *) The root instruction of the called computation is element-wise on + // 'operand'. + auto* callee_root = user->to_apply()->root_instruction(); + return param_uses.size() == 1 && param_uses[0].first == callee_root && + callee_root->IsElementwiseOnOperand(param_uses[0].second); + } // Check if 'user' is element-wise. return user->IsElementwise(); } @@ -322,6 +347,31 @@ bool CanShareOperandBufferWithUser(HloInstruction* operand, std::vector operand_indices = user->OperandIndices(operand); return operand_indices.size() == 1 && operand_indices[0] == 0; } + if (user->opcode() == HloOpcode::kCall) { + // Get all uses of value defined by 'operand' at 'operand_index'. + const auto& uses = + dataflow.GetValueDefinedAt(operand, operand_index).uses(); + // Return true iff: + // *) There exists two uses of 'operand'. + // *) One use is by 'user' (caller). + // *) One use is by root instruction of called computation (callee root). + // (Note: we check the root of the called computation, because the + // root result buffer is required to alias with the Call result buffer). + // *) The root instruction of the called computation is element-wise on + // 'operand'. + const bool found_caller_use = + std::find_if(uses.begin(), uses.end(), [user](const HloUse& use) { + return use.instruction == user; + }) != uses.end(); + auto* callee_root = user->to_apply()->root_instruction(); + const bool found_elementwise_callee_use = + std::find_if( + uses.begin(), uses.end(), [callee_root](const HloUse& use) { + return use.instruction == callee_root && + callee_root->IsElementwiseOnOperand(use.operand_number); + }) != uses.end(); + return uses.size() == 2 && found_caller_use && found_elementwise_callee_use; + } // Check if 'user' is element-wise. return user->IsElementwise(); } diff --git a/tensorflow/compiler/xla/service/liveness_util_test.cc b/tensorflow/compiler/xla/service/liveness_util_test.cc index b5e15906d3..476e86fa72 100644 --- a/tensorflow/compiler/xla/service/liveness_util_test.cc +++ b/tensorflow/compiler/xla/service/liveness_util_test.cc @@ -415,5 +415,44 @@ TEST_F(CanShareOperandBufferWithUserTest, WhileCanShare) { CanShareOperandBufferWithUser(data, {}, whil, {}, *dataflow_analysis_)); } +// Tests that Call can alias operand buffer if the only use of the operand +// in the called computation is an elementwise instruction. +TEST_F(CanShareOperandBufferWithUserTest, CallToComputationWithFusionRoot) { + Shape shape = ShapeUtil::MakeShape(F32, {8}); + // Build sub-computation with fusion root. + auto sub_builder = HloComputation::Builder(TestName() + "_sub"); + auto sub_param = sub_builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "sub_param")); + auto one = sub_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto ones = sub_builder.AddInstruction( + HloInstruction::CreateBroadcast(shape, one, {1})); + auto add = sub_builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, sub_param, ones)); + + module_ = CreateNewModule(); + auto sub_computation = module_->AddEmbeddedComputation(sub_builder.Build()); + sub_computation->CreateFusionInstruction({add, ones}, + HloInstruction::FusionKind::kLoop); + + // Build entry-computation with kCall which calls 'sub_computation'. + auto builder = HloComputation::Builder(TestName()); + + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param")); + auto reverse = + builder.AddInstruction(HloInstruction::CreateReverse(shape, param, {0})); + auto call = builder.AddInstruction( + HloInstruction::CreateCall(shape, {reverse}, sub_computation)); + computation_ = module_->AddEntryComputation(builder.Build()); + + RunAnalysis(); + + EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {}, + *points_to_analysis_)); + EXPECT_TRUE(CanShareOperandBufferWithUser(reverse, {}, call, {}, + *dataflow_analysis_)); +} + } // namespace } // namespace xla -- GitLab From 881f84796f2559c0e7fd8081d7449a214a4cf7ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 07:36:46 -0800 Subject: [PATCH 0049/1225] internal change PiperOrigin-RevId: 176107131 --- tensorflow/contrib/lite/toco/format_port.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/format_port.h b/tensorflow/contrib/lite/toco/format_port.h index 3bc3295d04..0e999001e0 100644 --- a/tensorflow/contrib/lite/toco/format_port.h +++ b/tensorflow/contrib/lite/toco/format_port.h @@ -36,7 +36,7 @@ inline const char* IdentityOrConvertStringToRaw(const std::string& foo) { return foo.c_str(); } -#if defined(PLATFORM_GOOGLE) +#if defined(PLATFORM_GOOGLE) && defined(HAS_GLOBAL_STRING) // Overloaded case where we return string. inline const char* IdentityOrConvertStringToRaw(const string& foo) { return foo.c_str(); -- GitLab From 573a652ec5512a35d84d5b4b4400d7430baa854a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 09:26:32 -0800 Subject: [PATCH 0050/1225] Add Speech ASR Language Model test. PiperOrigin-RevId: 176117985 --- .../lite/models/speech_terse_lm_model_test.cc | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc diff --git a/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc new file mode 100644 index 0000000000..04c54ffb22 --- /dev/null +++ b/tensorflow/contrib/lite/models/speech_terse_lm_model_test.cc @@ -0,0 +1,122 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// Unit test for speech ASR LM model using TFLite Ops. + +#include + +#include +#include + +#include "base/logging.h" +#include "file/base/path.h" +#include "testing/base/public/googletest.h" +#include +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/models/test_utils.h" + +namespace tflite { +namespace models { + +constexpr int kModelInput1Tensor = 0; +constexpr int kModelInput2Tensor = 66; +constexpr int kLstmLayer1OutputStateTensor = 21; +constexpr int kLstmLayer1CellStateTensor = 22; +constexpr int kLstmLayer2OutputStateTensor = 42; +constexpr int kLstmLayer2CellStateTensor = 43; +constexpr int kLstmLayer3OutputStateTensor = 63; +constexpr int kLstmLayer3CellStateTensor = 64; +constexpr int kModelOutputTensor = 75; + +static void ClearLstmStates(Interpreter* interpreter) { + memset(interpreter->tensor(kLstmLayer1OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer1CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer1CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer2OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer2CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer2CellStateTensor)->bytes); + + memset(interpreter->tensor(kLstmLayer3OutputStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3OutputStateTensor)->bytes); + memset(interpreter->tensor(kLstmLayer3CellStateTensor)->data.raw, 0, + interpreter->tensor(kLstmLayer3CellStateTensor)->bytes); +} + +TEST(SpeechTerseLm, EndToEndTest) { + // Read the model. + string tflite_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model.tflite"); + auto model = FlatBufferModel::BuildFromFile(tflite_file_path.c_str()); + CHECK(model) << "Failed to mmap model " << tflite_file_path; + + // Initialize the interpreter. + ops::builtin::BuiltinOpResolver builtins; + std::unique_ptr interpreter; + InterpreterBuilder(*model, builtins)(&interpreter); + CHECK(interpreter != nullptr); + interpreter->AllocateTensors(); + + // Load the input frames. + Frames input_frames; + const string input_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model_in.csv"); + ReadFrames(input_file_path, &input_frames); + + // Load the golden output results. + Frames output_frames; + const string output_file_path = + file::JoinPath(TestDataPath(), "speech_terse_lm_model_out.csv"); + ReadFrames(output_file_path, &output_frames); + + CHECK_EQ(interpreter->tensor(kModelInput1Tensor)->dims->size, 1); + const int input1_size = + interpreter->tensor(kModelInput1Tensor)->dims->data[0]; + CHECK_EQ(input1_size, 1); + CHECK_EQ(interpreter->tensor(kModelInput2Tensor)->dims->size, 1); + const int output_size = + interpreter->tensor(kModelOutputTensor)->dims->data[0]; + CHECK_EQ(output_size, 1); + + int* input_lookup_ptr = interpreter->tensor(kModelInput1Tensor)->data.i32; + int* output_lookup_ptr = interpreter->tensor(kModelInput2Tensor)->data.i32; + float* output_ptr = interpreter->tensor(kModelOutputTensor)->data.f; + + + for (int i = 0; i < input_frames.size(); i++) { + float output_score = 0.0f; + // Reset LSTM states for each sequence. + ClearLstmStates(interpreter.get()); + // For subsequent inputs feed them sequentially, one-by-one. + for (int k = 1; k < input_frames[i].size(); k++) { + // Feed the inputs to model. + input_lookup_ptr[0] = static_cast(input_frames[i][k - 1]); + output_lookup_ptr[0] = static_cast(input_frames[i][k]); + // Run the model. + interpreter->Invoke(); + // Sum up the outputs. + output_score += output_ptr[0]; + } + // Validate the output. + ASSERT_NEAR(output_score, output_frames[i][0], 1.4e-5); + } +} + +} // namespace models +} // namespace tflite -- GitLab From be4295e796437d18ffb7242942c963a8857e5003 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 17 Nov 2017 10:10:37 -0800 Subject: [PATCH 0051/1225] Created new shared IsFreeOfSideEffect and ModifiedFrameInfo functions. PiperOrigin-RevId: 176124088 --- tensorflow/core/grappler/BUILD | 2 ++ tensorflow/core/grappler/op_types.cc | 28 +++++++++++++++++++ tensorflow/core/grappler/op_types.h | 3 ++ .../optimizers/arithmetic_optimizer.cc | 26 ++++------------- .../optimizers/arithmetic_optimizer.h | 8 ++---- .../optimizers/dependency_optimizer.cc | 9 ++++-- 6 files changed, 48 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 7b18e79c8d..c81c6c0f21 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -21,6 +21,8 @@ cc_library( hdrs = ["op_types.h"], visibility = ["//visibility:public"], deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index acb8498142..69bdef33c6 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { namespace grappler { @@ -120,5 +122,31 @@ bool IsVariable(const NodeDef& node) { op == "VarHandleOp" || op == "ReadVariableOp"; } +bool IsFreeOfSideEffect(const NodeDef& node) { + // Placeholders must be preserved to keep the graph feedable. + if (IsPlaceholder(node)) { + return false; + } + const OpDef* op_def = nullptr; + Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + if (!status.ok()) { + return false; + } + if (op_def->is_stateful()) { + return false; + } + // Nodes such as Assign or AssignAdd modify one of their inputs. + for (const auto& input : op_def->input_arg()) { + if (input.is_ref()) { + return false; + } + } + return true; +} + +bool ModifiesFrameInfo(const NodeDef& node) { + return IsEnter(node) || IsExit(node) || IsNextIteration(node); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 0de954fcb4..a7c556c1ed 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -42,6 +42,9 @@ bool IsSwitch(const NodeDef& node); bool IsTranspose(const NodeDef& node); bool IsVariable(const NodeDef& node); +bool IsFreeOfSideEffect(const NodeDef& node); +bool ModifiesFrameInfo(const NodeDef& node); + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 0cd0d4351e..2677888fcb 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -512,33 +512,17 @@ bool UniqueNodes::SameNode(const NodeDef& node1, const NodeDef& node2) const { return true; } -// static -bool ArithmeticOptimizer::CanDedup( - const NodeDef& node, const std::unordered_set& nodes_to_preserve) { - if (nodes_to_preserve.find(node.name()) != nodes_to_preserve.end()) { +bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const { + if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { return false; } - if (IsEnter(node) || IsExit(node) || IsPlaceholder(node)) { + if (IsEnter(node) || IsExit(node)) { return false; } if (node.device().find("SPU") != string::npos) { return false; } - const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); - if (!status.ok()) { - return false; - } - if (op_def->is_stateful()) { - return false; - } - // Don't consolidate ops such as AssignAdd - for (const auto& input : op_def->input_arg()) { - if (input.is_ref()) { - return false; - } - } - return true; + return IsFreeOfSideEffect(node); } void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { @@ -553,7 +537,7 @@ void ArithmeticOptimizer::DedupComputations(GraphDef* optimized_graph) const { continue; } NodeDef* node = optimized_graph->mutable_node(i); - if (!CanDedup(*node, nodes_to_preserve_)) { + if (!CanDedup(*node)) { continue; } NodeDef* rep = nodes.FindOrAddRepresentative(node); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index c8cc292295..c22e2d5363 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -28,11 +28,6 @@ namespace grappler { // run a model. class ArithmeticOptimizer : public GraphOptimizer { public: - // Returns true if it is safe to dedup node from the graph. - // TODO(rmlarsen): Refactor to op_types.{h,cc}. - static bool CanDedup(const NodeDef& node, - const std::unordered_set& nodes_to_preserve); - ArithmeticOptimizer() : opt_level_(RewriterConfig::ON) {} explicit ArithmeticOptimizer(RewriterConfig::Toggle opt_level) : opt_level_(opt_level) {} @@ -47,6 +42,9 @@ class ArithmeticOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + // Returns true if it is safe to dedup node from the graph. + bool CanDedup(const NodeDef& node) const; + void DedupComputations(GraphDef* optimized_graph) const; // Runs peep-hole optimizations on `optimized_graph`, e.g., removing inverse // transposes. diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index 49eb29d037..57eee60646 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -93,11 +93,16 @@ bool DependencyOptimizer::SafeToConvertToNoOp(const NodeDef& node) { if (!has_fetch_ || HasRegularOutputs(node, *node_map_)) { return false; } - + if (nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { + return false; + } if (IsMerge(node)) { return false; } - if (!ArithmeticOptimizer::CanDedup(node, nodes_to_preserve_)) { + if (ModifiesFrameInfo(node)) { + return false; + } + if (!IsFreeOfSideEffect(node)) { return false; } -- GitLab From 34d4986e661b8d45f7cec2a717c401c65f0a242f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:28:28 -0800 Subject: [PATCH 0052/1225] Fix the numbering of the LSTM layers in the figure. PiperOrigin-RevId: 176126886 --- tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg index ca96556422..9f841c219b 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_am.svg @@ -1,4 +1,4 @@ - + -- GitLab From 6fecbc39f37643f30ebd0681240b2c2fdede5b09 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:32:07 -0800 Subject: [PATCH 0053/1225] Added tests for tensorflow::StringPiece::Hasher. PiperOrigin-RevId: 176127449 --- tensorflow/core/lib/core/stringpiece_test.cc | 72 ++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tensorflow/core/lib/core/stringpiece_test.cc b/tensorflow/core/lib/core/stringpiece_test.cc index ad70d41873..11554554e8 100644 --- a/tensorflow/core/lib/core/stringpiece_test.cc +++ b/tensorflow/core/lib/core/stringpiece_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/lib/core/stringpiece.h" + +#include #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -63,4 +65,74 @@ TEST(StringPiece, Contains) { EXPECT_TRUE(!a.contains(d)); } +TEST(StringPieceHasher, Equality) { + StringPiece::Hasher hasher; + + StringPiece s1("foo"); + StringPiece s2("bar"); + StringPiece s3("baz"); + StringPiece s4("zot"); + + EXPECT_TRUE(hasher(s1) != hasher(s2)); + EXPECT_TRUE(hasher(s1) != hasher(s3)); + EXPECT_TRUE(hasher(s1) != hasher(s4)); + EXPECT_TRUE(hasher(s2) != hasher(s3)); + EXPECT_TRUE(hasher(s2) != hasher(s4)); + EXPECT_TRUE(hasher(s3) != hasher(s4)); + + EXPECT_TRUE(hasher(s1) == hasher(s1)); + EXPECT_TRUE(hasher(s2) == hasher(s2)); + EXPECT_TRUE(hasher(s3) == hasher(s3)); + EXPECT_TRUE(hasher(s4) == hasher(s4)); +} + +TEST(StringPieceHasher, HashMap) { + string s1("foo"); + string s2("bar"); + string s3("baz"); + + StringPiece p1(s1); + StringPiece p2(s2); + StringPiece p3(s3); + + std::unordered_map map; + + map.insert(std::make_pair(p1, 0)); + map.insert(std::make_pair(p2, 1)); + map.insert(std::make_pair(p3, 2)); + EXPECT_EQ(map.size(), 3); + + bool found[3] = {false, false, false}; + for (auto const& val : map) { + int x = val.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], true); + EXPECT_EQ(found[2], true); + + auto new_iter = map.find("zot"); + EXPECT_TRUE(new_iter == map.end()); + + new_iter = map.find("bar"); + EXPECT_TRUE(new_iter != map.end()); + + map.erase(new_iter); + EXPECT_EQ(map.size(), 2); + + found[0] = false; + found[1] = false; + found[2] = false; + for (const auto& iter : map) { + int x = iter.second; + EXPECT_TRUE(x >= 0 && x < 3); + EXPECT_TRUE(!found[x]); + found[x] = true; + } + EXPECT_EQ(found[0], true); + EXPECT_EQ(found[1], false); + EXPECT_EQ(found[2], true); +} } // namespace tensorflow -- GitLab From fedb844013194539e23cb971df793b4029396c2f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 10:44:57 -0800 Subject: [PATCH 0054/1225] Throw error if context exists but graph is empty when enabling eager mode. PiperOrigin-RevId: 176129497 --- tensorflow/python/framework/ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 6ac3b862c8..7cca260d73 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4703,6 +4703,9 @@ def enable_eager_execution(config=None, device_policy=None): " policy: %s." % (config, context._context._config, device_policy, context._context._device_policy)) + else: + raise ValueError( + "tfe.enable_eager_execution has to be called at program startup.") def eager_run(main=None, argv=None): -- GitLab From de1cd503235a32ec216533d198dd6f6318655ab2 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 17 Nov 2017 10:58:28 -0800 Subject: [PATCH 0055/1225] Minor tf_session.i refactoring. Moves inline C++ helper functions to top of file, and adds CreateWrappedTFOutput and CreateWrappedTFOperation helper functions (this is pulling out existing functionality that will be useful moving forward). PiperOrigin-RevId: 176131555 --- tensorflow/python/client/tf_session.i | 82 +++++++++++++++------------ 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 40731aba7d..41c707ae63 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -24,6 +24,49 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/public/version.h" +// Helper function to convert a Python list of Tensors to a C++ vector of +// TF_Outputs. +// +// Returns true if successful. Otherwise, returns false and sets error_msg. +bool PyTensorListToVector(PyObject* py_tensor_list, + std::vector* vec, + string* error_msg) { + if (!PyList_Check(py_tensor_list)) { + *error_msg = "expected Python list."; + return false; + } + size_t size = PyList_Size(py_tensor_list); + for (int i = 0; i < size; ++i) { + PyObject* item = PyList_GetItem(py_tensor_list, i); + TF_Output* input_ptr; + if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast(&input_ptr), + SWIGTYPE_p_TF_Output, 0))) { + *error_msg = "expected Python list of wrapped TF_Output objects. " + "Found python list of something else."; + return false; + } + vec->push_back(*input_ptr); + } + return true; +} + +// Helper function to convert a TF_Output to a wrapped TF_Output Python object. +PyObject* CreateWrappedTFOutput(TF_Output tf_output) { + // We used heap-allocated pointers in the Python runtime (this is what SWIG + // generates by default for functions returning TF_Output). + TF_Output* tf_output_ptr = new TF_Output(tf_output); + // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python. + return SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output, + SWIG_POINTER_OWN); +} + +// Helper function to convert a TF_Operation to a wrapped TF_Operation Python +// object. +PyObject* CreateWrappedTFOperation(TF_Operation* tf_operation) { + // No flags since operation is owned by TF_Graph. + return SWIG_NewPointerObj(tf_operation, SWIGTYPE_p_TF_Operation, 0); +} + %} %include "tensorflow/python/client/tf_sessionrun_wrapper.i" @@ -98,8 +141,7 @@ tensorflow::ImportNumpy(); } for (size_t i = 0; i < $1.size(); ++i) { - PyList_SET_ITEM($result, i, SWIG_NewPointerObj( - $1[i], SWIGTYPE_p_TF_Operation, 0)); + PyList_SET_ITEM($result, i, CreateWrappedTFOperation($1[i])); } } @@ -118,13 +160,7 @@ tensorflow::ImportNumpy(); // Unwrap the generated SwigValueWrapper> via & std::vector* tf_outputs = &$1; for (size_t i = 0; i < $1.size(); ++i) { - // We used wrapped heap-allocated pointers in the Python runtime (this is - // what SWIG generates by default for functions returning TF_Output). - TF_Output* tf_output_ptr = new TF_Output((*tf_outputs)[i]); - // Use SWIG_POINTER_OWN so the TF_Output* is deleted by Python. - PyList_SET_ITEM($result, i, - SWIG_NewPointerObj(tf_output_ptr, SWIGTYPE_p_TF_Output, - SWIG_POINTER_OWN)); + PyList_SET_ITEM($result, i, CreateWrappedTFOutput((*tf_outputs)[i])); } } @@ -268,34 +304,6 @@ tensorflow::ImportNumpy(); reinterpret_cast($1.data), $1.length); } -%inline %{ -// Helper function to convert a Python list of Tensors to a C++ vector of -// TF_Outputs. -// -// Returns true if successful. Otherwise, returns false and sets error_msg. -bool PyTensorListToVector(PyObject* py_tensor_list, - std::vector* vec, - string* error_msg) { - if (!PyList_Check(py_tensor_list)) { - *error_msg = "expected Python list."; - return false; - } - size_t size = PyList_Size(py_tensor_list); - for (int i = 0; i < size; ++i) { - PyObject* item = PyList_GetItem(py_tensor_list, i); - TF_Output* input_ptr; - if (!SWIG_IsOK(SWIG_ConvertPtr(item, reinterpret_cast(&input_ptr), - SWIGTYPE_p_TF_Output, 0))) { - *error_msg = "expected Python list of wrapped TF_Output objects. " - "Found python list of something else."; - return false; - } - vec->push_back(*input_ptr); - } - return true; -} -%} - // Converts input Python list of wrapped TF_Outputs into a single array %typemap(in) (const TF_Output* inputs, int num_inputs) (std::vector inputs) { -- GitLab From 684c02d91116022bbceea13fc4a0cff9267d8534 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 11:14:12 -0800 Subject: [PATCH 0056/1225] Add g3doc for the ASR LM model. PiperOrigin-RevId: 176134219 --- .../contrib/lite/models/testdata/g3doc/README.md | 13 +++++++++++++ .../contrib/lite/models/testdata/g3doc/asr_lm.svg | 4 ++++ 2 files changed, 17 insertions(+) create mode 100644 tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/README.md b/tensorflow/contrib/lite/models/testdata/g3doc/README.md index d0c21d2833..da4802b07d 100644 --- a/tensorflow/contrib/lite/models/testdata/g3doc/README.md +++ b/tensorflow/contrib/lite/models/testdata/g3doc/README.md @@ -61,6 +61,19 @@ the corresponding parameters as shown in the figure. ![asr_am_model](asr_am.svg "ASR AM model") +### Automatic Speech Recognizer (ASR) Language Model (LM) + +The language model for automatic speech recognition is the neural network model +for predicting the probability of a word given previous words in a sentence. +It generates posterior probabilities of the next word based from a sequence of +words. The words are encoded as indices in a fixed size dictionary. +The model has two inputs both of size one (integer), an output size of one +(float). It consits of three embedding layer, three LSTM layers, followed by a +multiplication, a fully connected layers and an addition. +The corresponding parameters as shown in the figure. + +![asr_lm_model](asr_lm.svg "ASR LM model") + ## Speech models test input/output generation As mentioned above the input to models are generated from a pre-processing diff --git a/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg new file mode 100644 index 0000000000..84d5f95b6a --- /dev/null +++ b/tensorflow/contrib/lite/models/testdata/g3doc/asr_lm.svg @@ -0,0 +1,4 @@ + + + + -- GitLab From b8bef6e6c89931768ac1f6b28d834d359e761410 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 17 Nov 2017 11:26:54 -0800 Subject: [PATCH 0057/1225] Rename optimal to optimal_seconds in HloCostAnalysis etc. PiperOrigin-RevId: 176136105 --- .../compiler/xla/service/hlo_cost_analysis.cc | 20 +++++++++---------- .../compiler/xla/service/hlo_cost_analysis.h | 6 +++--- .../xla/service/hlo_cost_analysis_test.cc | 2 +- .../xla/service/hlo_execution_profile.cc | 2 +- .../xla/service/hlo_profile_printer.cc | 2 +- .../xla/service/hlo_profile_printer.h | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 1877065f67..a24457edbf 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -28,7 +28,7 @@ namespace xla { constexpr char HloCostAnalysis::kFlopsKey[]; constexpr char HloCostAnalysis::kTranscendentalsKey[]; constexpr char HloCostAnalysis::kBytesAccessedKey[]; -constexpr char HloCostAnalysis::kSecondsKey[]; +constexpr char HloCostAnalysis::kOptimalSecondsKey[]; HloCostAnalysis::HloCostAnalysis(const ShapeSizeFunction& shape_size) : HloCostAnalysis(shape_size, {}) {} @@ -60,16 +60,16 @@ Status HloCostAnalysis::Postprocess(const HloInstruction* hlo) { if (current_should_compute_bottleneck_time_) { // Compute the time as the time of the bottleneck, i.e. the slowest property // given the per-second rate of each property. - float max_seconds = 0.0f; + float optimal_seconds = 0.0f; for (const auto& property : current_properties_) { - if (property.first != kSecondsKey) { - max_seconds = std::max( - max_seconds, + if (property.first != kOptimalSecondsKey) { + optimal_seconds = std::max( + optimal_seconds, property.second / GetProperty(property.first, per_second_rates_, INFINITY)); } } - current_properties_[kSecondsKey] = max_seconds; + current_properties_[kOptimalSecondsKey] = optimal_seconds; } TF_RET_CHECK(hlo_properties_.emplace(hlo, current_properties_).second); @@ -496,8 +496,8 @@ float HloCostAnalysis::bytes_accessed() const { return GetProperty(kBytesAccessedKey, properties_sum_); } -float HloCostAnalysis::seconds() const { - return GetProperty(kSecondsKey, properties_sum_); +float HloCostAnalysis::optimal_seconds() const { + return GetProperty(kOptimalSecondsKey, properties_sum_); } int64 HloCostAnalysis::flop_count(const HloInstruction& hlo) const { @@ -512,8 +512,8 @@ int64 HloCostAnalysis::bytes_accessed(const HloInstruction& hlo) const { return GetPropertyForHlo(hlo, kBytesAccessedKey, hlo_properties_); } -float HloCostAnalysis::seconds(const HloInstruction& hlo) const { - return GetPropertyForHlo(hlo, kSecondsKey, hlo_properties_); +float HloCostAnalysis::optimal_seconds(const HloInstruction& hlo) const { + return GetPropertyForHlo(hlo, kOptimalSecondsKey, hlo_properties_); } StatusOr HloCostAnalysis::ProcessSubcomputation( diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 0f44775378..e785596c8e 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -42,7 +42,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor { static constexpr char kFlopsKey[] = "flops"; static constexpr char kTranscendentalsKey[] = "transcendentals"; static constexpr char kBytesAccessedKey[] = "bytes accessed"; - static constexpr char kSecondsKey[] = "seconds"; + static constexpr char kOptimalSecondsKey[] = "optimal_seconds"; // shape_size is a function which returns the size in bytes of the top-level // buffer of a shape. @@ -118,14 +118,14 @@ class HloCostAnalysis : public ConstDfsHloVisitor { float flop_count() const; float transcendental_count() const; float bytes_accessed() const; - float seconds() const; + float optimal_seconds() const; // Returns the respective cost computed for a particular HLO instruction, or 0 // if the HLO was not found to have a cost in the analysis. int64 flop_count(const HloInstruction& hlo) const; int64 transcendental_count(const HloInstruction& hlo) const; int64 bytes_accessed(const HloInstruction& hlo) const; - float seconds(const HloInstruction& hlo) const; + float optimal_seconds(const HloInstruction& hlo) const; const Properties& properties() const { return properties_sum_; } const float property(const string& key) const { diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index 0eaa21ef25..3b289c240a 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -389,7 +389,7 @@ TEST_F(FusionCostAnalysis, LoopFusion) { static_assert(bytes_accessed == 64, ""); EXPECT_EQ(fusion_analysis.bytes_accessed(), bytes_accessed); - EXPECT_EQ(fusion_analysis.seconds(), 1 << i); + EXPECT_EQ(fusion_analysis.optimal_seconds(), 1 << i); } } diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc index 755374b91d..9e256b9b37 100644 --- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -83,7 +83,7 @@ static HloProfilePrinter CreateOwnedHloProfilePrinter( instruction_info->transcendental_count = cost_analysis.transcendental_count(*hlo); instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo); - instruction_info->seconds = cost_analysis.seconds(*hlo); + instruction_info->optimal_seconds = cost_analysis.optimal_seconds(*hlo); instruction_info->profile_index = hlo_profile_index_map.GetProfileIndexFor(*hlo); CHECK_LT(instruction_info->profile_index, max_profile_index); diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.cc b/tensorflow/compiler/xla/service/hlo_profile_printer.cc index 071c5a6629..e944ad1513 100644 --- a/tensorflow/compiler/xla/service/hlo_profile_printer.cc +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.cc @@ -50,7 +50,7 @@ string HloProfilePrinter::ToString(const int64* counters, /*short_name=*/instruction->short_name, instruction->category, counters[instruction->profile_index], instruction->flop_count, instruction->transcendental_count, instruction->bytes_accessed, - instruction->seconds); + instruction->optimal_seconds); } result += builder.ToString(); diff --git a/tensorflow/compiler/xla/service/hlo_profile_printer.h b/tensorflow/compiler/xla/service/hlo_profile_printer.h index 45921c66f6..316753a82a 100644 --- a/tensorflow/compiler/xla/service/hlo_profile_printer.h +++ b/tensorflow/compiler/xla/service/hlo_profile_printer.h @@ -41,7 +41,7 @@ class HloProfilePrinter { float flop_count; float transcendental_count; float bytes_accessed; - float seconds; + float optimal_seconds; // The index into the profile counters array for the HloInstruction // corresponding to this HloInstructionInfo. -- GitLab From 9dd9246d76aeada08f07d8c9550d7eedb0809713 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:08:27 -0800 Subject: [PATCH 0058/1225] Open-sourcing AddSign and PowerSign optimizers, found in Neural Optimizer Search with Reinforcement Learning [Bello et al, ICML2017] PiperOrigin-RevId: 176142062 --- tensorflow/contrib/opt/BUILD | 51 ++++ tensorflow/contrib/opt/__init__.py | 5 +- .../contrib/opt/python/training/addsign.py | 169 +++++++++++ .../opt/python/training/addsign_test.py | 262 +++++++++++++++++ .../contrib/opt/python/training/powersign.py | 173 +++++++++++ .../opt/python/training/powersign_test.py | 268 ++++++++++++++++++ .../contrib/opt/python/training/sign_decay.py | 158 +++++++++++ .../opt/python/training/sign_decay_test.py | 110 +++++++ tensorflow/core/kernels/training_ops.cc | 250 +++++++++++++++- tensorflow/core/kernels/training_ops.h | 23 ++ .../core/kernels/training_ops_gpu.cu.cc | 74 +++++ tensorflow/core/kernels/training_ops_test.cc | 74 +++++ tensorflow/core/ops/training_ops.cc | 137 +++++++++ tensorflow/core/ops/training_ops_test.cc | 34 +++ 14 files changed, 1786 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/opt/python/training/addsign.py create mode 100644 tensorflow/contrib/opt/python/training/addsign_test.py create mode 100644 tensorflow/contrib/opt/python/training/powersign.py create mode 100644 tensorflow/contrib/opt/python/training/powersign_test.py create mode 100644 tensorflow/contrib/opt/python/training/sign_decay.py create mode 100644 tensorflow/contrib/opt/python/training/sign_decay_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 096d2270e4..8c46becf2c 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,11 +14,14 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/external_optimizer.py", "python/training/lazy_adam_optimizer.py", "python/training/moving_average_optimizer.py", "python/training/nadam_optimizer.py", + "python/training/powersign.py", + "python/training/sign_decay.py", "python/training/variable_clipping_optimizer.py", ], srcs_version = "PY2AND3", @@ -152,6 +155,54 @@ tf_py_test( ], ) +py_test( + name = "sign_decay_test", + srcs = ["python/training/sign_decay_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:client_testlib", + ], +) + +py_test( + name = "addsign_test", + srcs = ["python/training/addsign_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + +py_test( + name = "powersign_test", + srcs = ["python/training/powersign_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:session", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index e194fa2d4d..caf22536bb 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,12 +19,14 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import * -from tensorflow.contrib.opt.python.training.nadam_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * # pylint: enable=wildcard-import @@ -32,6 +34,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'PowerSignOptimizer', 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', diff --git a/tensorflow/contrib/opt/python/training/addsign.py b/tensorflow/contrib/opt/python/training/addsign.py new file mode 100644 index 0000000000..729e59cb0a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/addsign.py @@ -0,0 +1,169 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of AddSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_ops + + +class AddSignOptimizer(optimizer.Optimizer): + """Optimizer that implements the AddSign update. + + See Neural Optimizer Search with Reinforcement Learning + [Bello et al., ICML2017]. + """ + + def __init__(self, + learning_rate=0.1, + alpha=1.0, + beta=0.9, + sign_decay_fn=None, + use_locking=False, + name='AddSignOptimizer'): + """Constructs a new AddSignOptimizer object. + + Initialization: + + m_0 <- 0 (Initialize initial 1st moment vector) + t <- 0 (Initialize timestep) + ``` + + Update: + + ``` + t <- t + 1 + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + sign_decay <- sign_decay(t) + update <- (alpha + sign_decay * sign(g) *sign(m)) * g + variable <- variable - lr_t * update + ``` + + Example for AddSign-ld (AddSign with linear sign decay) + ``` + decay_steps = 1000 + linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps) + opt = AddSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn) + ``` + + Args: + learning_rate: learning_rate used when taking a step. + alpha: alpha used in optimizer. + beta: decay used for computing the moving average m. + sign_decay_fn: decay function applied to the sign(g*m) quantity. + Takes global_step as an argument and returns the quantity to multiply + the sign(g*m) by. + compute (1.0 + alpha * decay * sign(g) * sign(m)) * m. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AddSignOptimizer". + """ + super(AddSignOptimizer, self).__init__(use_locking, name) + self._lr = learning_rate + self._alpha = alpha + self._beta = beta + + self._sign_decay_fn = sign_decay_fn + + # Tensor versions of the constructor arguments, created in _prepare(). + self._lr_t = None + self._alpha_t = None + self._beta_t = None + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + if self._sign_decay_fn is not None: + self._sign_decay_t = ops.convert_to_tensor( + self._sign_decay_fn(global_step), name='sign_decay') + return super(AddSignOptimizer, self).apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + def _create_slots(self, var_list): + # Create slots for the first moment. + for v in var_list: + self._zeros_slot(v, 'm', self._name) + + def _prepare(self): + self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate') + self._beta_t = ops.convert_to_tensor(self._beta, name='beta') + self._alpha_t = ops.convert_to_tensor(self._alpha, name='alpha') + if self._sign_decay_fn is None: + self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay') + + def _apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.apply_add_sign( + var, + m, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._alpha_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.resource_apply_add_sign( + var.handle, + m.handle, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._alpha_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var): + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype) + beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) + + m = self.get_slot(var, 'm') + m_t = state_ops.assign( + m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking) + + sign_g = ops.IndexedSlices( + math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape) + sign_gm = ops.IndexedSlices( + array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values, + sign_g.indices, + dense_shape=sign_g.dense_shape) + + sign_decayed = math_ops.cast( + self._sign_decay_t, var.dtype.base_dtype) + multiplier_values = alpha_t + sign_decayed * sign_gm.values + multiplier = ops.IndexedSlices( + multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape) + + final_update = ops.IndexedSlices( + lr_t * multiplier.values * grad.values, + multiplier.indices, + dense_shape=multiplier.dense_shape) + + var_update = state_ops.scatter_sub( + var, + final_update.indices, + final_update.values, + use_locking=self._use_locking) + + return control_flow_ops.group(* [var_update, m_t]) diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py new file mode 100644 index 0000000000..bd19ee3e7a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -0,0 +1,262 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AddSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import addsign +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + return linear_decay + + +def addsign_update_numpy(params, + g_t, + m, + lr, + alpha=1.0, + beta=0.9, + py_sign_decay_fn=None, + t=None): + m_t = beta * m + (1 - beta) * g_t + if py_sign_decay_fn is None: + sign_decayed = 1.0 + else: + sign_decayed = py_sign_decay_fn(t-1) + multiplier = alpha + sign_decayed * np.sign(g_t) * np.sign(m_t) + params_t = params - lr * multiplier * g_t + return params_t, m_t + + +class AddSignTest(test.TestCase): + + def _testDense(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + alpha=1.0, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = addsign.AddSignOptimizer( + learning_rate=learning_rate, + alpha=alpha, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 7 steps of AddSign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + else: + if context.in_graph_mode(): + self.evaluate(neg_update) + elif t > 1: + opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + var0_np, m0 = addsign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = addsign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testDense(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testDense(use_resource=False) + self._testDense(use_resource=False, learning_rate=0.01, alpha=0.1, beta=0.8) + self._testDense(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + self._testDense(use_resource=True) + self._testDense(use_resource=True, learning_rate=0.01, alpha=0.1, beta=0.8) + self._testDense(use_resource=True, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + def _testSparse(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + alpha=1.0, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = addsign.AddSignOptimizer( + learning_rate=learning_rate, + alpha=alpha, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 7 steps of AddSign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 4): + if t < 5: + update.run() + else: + neg_update.run() + + var0_np, m0 = addsign_update_numpy( + var0_np, + grads0_np, + m0, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = addsign_update_numpy( + var1_np, + grads1_np, + m1, + learning_rate, + alpha=alpha, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testSparse(use_resource=False) + self._testSparse(use_resource=False, + learning_rate=0.01, + alpha=0.1, + beta=0.8) + self._testSparse(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/opt/python/training/powersign.py b/tensorflow/contrib/opt/python/training/powersign.py new file mode 100644 index 0000000000..7f7521581f --- /dev/null +++ b/tensorflow/contrib/opt/python/training/powersign.py @@ -0,0 +1,173 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of PowerSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import training_ops + + +class PowerSignOptimizer(optimizer.Optimizer): + """Optimizer that implements the PowerSign update. + + See Neural Optimizer Search with Reinforcement Learning + [Bello et al., ICML2017]. + """ + + def __init__(self, + learning_rate=0.1, + base=math.e, + beta=0.9, + sign_decay_fn=None, + use_locking=False, + name='PowerSignOptimizer'): + """Constructs a new PowerSignOptimizer object. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + t <- 0 (Initialize timestep) + ``` + + Update: + + ``` + t <- t + 1 + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + sign_decay <- sign_decay(t) + update <- base ** (sign_decay * sign(g) * sign(m)) * g + variable <- variable - lr_t * update + ``` + + Example usage for PowerSign-cd (PowerSign with cosine sign decay) + ``` + decay_steps = 1000 + linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps) + opt = PowerSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn) + ``` + + Args: + learning_rate: learning_rate used when taking a step. + base: base used in optimizer. + beta: decay used for computing the moving average m. + sign_decay_fn: decay function applied to the sign(g*m) quantity. + Takes global_step as an argument and returns the quantity to multiply + the sign(g*m) by. + use_locking: If True use locks for update operations. + name: Optional name for the operations created iwhen applying gradients. + Defaults to "PowerSignOptimizer". + """ + super(PowerSignOptimizer, self).__init__(use_locking, name) + self._lr = learning_rate + self._beta = beta + self._logbase = math.log(base) + + self._sign_decay_fn = sign_decay_fn + + # Tensor versions of the constructor arguments, created in _prepare(). + self._lr_t = None + self._beta_t = None + self._logbase_t = None + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + if self._sign_decay_fn is not None: + self._sign_decay_t = ops.convert_to_tensor( + self._sign_decay_fn(global_step), name='sign_decay') + return super(PowerSignOptimizer, self).apply_gradients( + grads_and_vars, global_step=global_step, name=name) + + def _create_slots(self, var_list): + # Create slots for the first moment. + for v in var_list: + self._zeros_slot(v, 'm', self._name) + + def _prepare(self): + self._lr_t = ops.convert_to_tensor(self._lr, name='learning_rate') + self._beta_t = ops.convert_to_tensor(self._beta, name='beta') + self._logbase_t = ops.convert_to_tensor(self._logbase, name='logbase') + if self._sign_decay_fn is None: + self._sign_decay_t = ops.convert_to_tensor(1.0, name='sign_decay') + + def _apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.apply_power_sign( + var, + m, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._logbase_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, 'm') + return training_ops.resource_apply_power_sign( + var.handle, + m.handle, + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._logbase_t, var.dtype.base_dtype), + math_ops.cast(self._sign_decay_t, var.dtype.base_dtype), + math_ops.cast(self._beta_t, var.dtype.base_dtype), + grad, + use_locking=self._use_locking) + + def _apply_sparse(self, grad, var): + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype) + logbase_t = math_ops.cast(self._logbase_t, var.dtype.base_dtype) + e_t = math_ops.cast(math.e, var.dtype.base_dtype) + + m = self.get_slot(var, 'm') + m_t = state_ops.assign( + m, (m * beta_t) + (grad * (1 - beta_t)), use_locking=self._use_locking) + + sign_g = ops.IndexedSlices( + math_ops.sign(grad.values), grad.indices, dense_shape=grad.dense_shape) + sign_gm = ops.IndexedSlices( + array_ops.gather(math_ops.sign(m_t), sign_g.indices) * sign_g.values, + sign_g.indices, + dense_shape=sign_g.dense_shape) + + sign_decayed = math_ops.cast( + self._sign_decay_t, var.dtype.base_dtype) + multiplier_values = math_ops.pow( + e_t, logbase_t * sign_decayed * sign_gm.values) + multiplier = ops.IndexedSlices( + multiplier_values, sign_gm.indices, dense_shape=sign_gm.dense_shape) + + final_update = ops.IndexedSlices( + lr_t * multiplier.values * grad.values, + multiplier.indices, + dense_shape=multiplier.dense_shape) + + var_update = state_ops.scatter_sub( + var, + final_update.indices, + final_update.values, + use_locking=self._use_locking) + + return control_flow_ops.group(* [var_update, m_t]) diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py new file mode 100644 index 0000000000..ff7b1a72d4 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -0,0 +1,268 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for PowerSign.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import numpy as np + +from tensorflow.contrib.opt.python.training import powersign +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + return linear_decay + + +def powersign_update_numpy(params, + g_t, + m, + lr, + base=math.e, + beta=0.9, + py_sign_decay_fn=None, + t=None): + m_t = beta * m + (1 - beta) * g_t + if py_sign_decay_fn is None: + sign_decayed = 1.0 + else: + sign_decayed = py_sign_decay_fn(t-1) + multiplier = base ** (sign_decayed * np.sign(g_t) * np.sign(m_t)) + params_t = params - lr * multiplier * g_t + return params_t, m_t + + +class PowerSignTest(test.TestCase): + + def _testDense(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + base=math.e, + beta=0.9): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(use_gpu=True): + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = powersign.PowerSignOptimizer( + learning_rate=learning_rate, + base=base, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 7 steps of powersign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + else: + if context.in_graph_mode(): + self.evaluate(neg_update) + elif t > 1: + opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + + var0_np, m0 = powersign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = powersign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + + def testDense(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testDense(use_resource=False) + self._testDense(use_resource=False, + learning_rate=0.1, + base=10.0, + beta=0.8) + self._testDense(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + self._testDense(use_resource=True) + self._testDense(use_resource=True, learning_rate=0.1, base=10.0, beta=0.8) + self._testDense(use_resource=True, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + def _testSparse(self, + use_resource=False, + learning_rate=0.1, + sign_decay_fn=None, + py_sign_decay_fn=None, + base=math.e, + beta=0.9): + with self.test_session(use_gpu=True): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + # Initialize variables for numpy implementation. + m0, m1 = 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + global_step = resource_variable_ops.ResourceVariable( + 0, trainable=False) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + global_step = variables.Variable( + 0, trainable=False) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([0, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = powersign.PowerSignOptimizer( + learning_rate=learning_rate, + base=base, + beta=beta, + sign_decay_fn=sign_decay_fn, + ) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]), + global_step=global_step) + neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), + global_step=global_step) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of powersign + # first 4 steps with positive gradient + # last 3 steps with negative gradient (sign(gm) should be -1) + for t in range(1, 8): + if t < 5: + update.run() + else: + neg_update.run() + + var0_np, m0 = powersign_update_numpy( + var0_np, + grads0_np if t < 5 else -grads0_np, + m0, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + var1_np, m1 = powersign_update_numpy( + var1_np, + grads1_np if t < 5 else -grads1_np, + m1, + learning_rate, + base=base, + beta=beta, + py_sign_decay_fn=py_sign_decay_fn, + t=t, + ) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + decay_steps = 10 + sign_decay_fn = sign_decay.get_linear_decay_fn(decay_steps) + py_sign_decay_fn = py_linear_decay_fn(decay_steps) + self._testSparse(use_resource=False) + self._testSparse(use_resource=False, + learning_rate=0.01, + base=2.0, + beta=0.8) + self._testSparse(use_resource=False, + sign_decay_fn=sign_decay_fn, + py_sign_decay_fn=py_sign_decay_fn) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/opt/python/training/sign_decay.py b/tensorflow/contrib/opt/python/training/sign_decay.py new file mode 100644 index 0000000000..e8870c0721 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/sign_decay.py @@ -0,0 +1,158 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementation of the sign decay functions used in PowerSign and AddSign. + +See [Bello et al., ICML 2017] Neural Optimizer Search with Reinforcement +Learning for details. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def get_linear_decay_fn(decay_steps): + """Returns a function that computes a linear decay. + + This decay computes linear annealing: + max(0, (decay_steps - global_step) / decay_steps) + + Example usage: + ``` + decay_steps = 1000 + linear_decay_fn = get_linear_decay_fn(decay_steps) + decayed = linear_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + Returns: + linear_decay_fn: a function that computes the linear decay. + """ + # pylint:disable=missing-docstring + def linear_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for linear_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + remaining_steps = math_ops.to_int32(decay_steps) - math_ops.to_int32( + global_step) + decayed = math_ops.to_float(remaining_steps) / math_ops.to_float( + decay_steps) + return math_ops.maximum(0.0, decayed) + # pylint:enable=missing-docstring + return linear_decay_fn + + +def get_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None): + """Returns a function that computes a cosine decay. + + This decay computes cosine annealing: + 0.5 * (1.0 + cos(2.0 * pi * num_periods * global_step / decay_steps)) + + This decay can be used to decay the sign quantity in the AddSign and PowerSign + optimizers discovered in + [Bello et al., ICML 2017] Neural Optimizer Search with RL. + + Example usage: + ``` + decay_steps = 1000 + num_periods = 2 + cosine_decay_fn = get_cosine_decay_fn(decay_steps, num_periods=num_periods) + decayed = cosine_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + num_periods: number of periods for cosine signal. 0.5 by default, + which maps the last decay step to 0. + zero_after: if not None, number after which the decay function + will just return 0. + Returns: + cosine_decay_fn: a function that computes the cosine decay. + """ + # pylint:disable=missing-docstring + def cosine_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for cosine_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + completed_fraction = math_ops.to_float(global_step) / math_ops.to_float( + decay_steps) + fraction = 2.0 * num_periods * completed_fraction + decayed = 0.5 * ( + 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) + if zero_after is not None: + decayed = array_ops.where( + math_ops.greater_equal(fraction, 2 * zero_after), 0.0, decayed) + return decayed + # pylint:enable=missing-docstring + return cosine_decay_fn + + +def get_restart_decay_fn(decay_steps, num_periods=1, zero_after=None): + """Returns a function that computes a restart decay. + + This decay computes + 0.5 * (1.0 + cos(pi * (num_periods * global_step) % num_training_steps)) + + This is a simplified version of the restart decay introduced in + "SGDR: Stochastic Gradient Descent with Warm Restarts" + by Ilya Loshchilov & Frank Hutter, Proceedings of + ICLR'2017, available at https://arxiv.org/pdf/1608.03983.pdf + + This decay can be used to decay the sign quantity in the AddSign and PowerSign + optimizers discovered in + [Bello et al., ICML 2017] Neural Optimizer Search with RL. + + Example usage: + ``` + decay_steps = 1000 + num_periods = 2.0 + restart_decay_fn = get_restart_decay_fn(decay_steps, + num_periods=num_periods) + decayed = restart_decay_fn(global_step) + x *= decayed + ``` + Args: + decay_steps: number of steps to decay over. + num_periods: number of periods for cosine signal. 1 by default, + which maps the last decay step to 0. + zero_after: if not None, number after which the decay function + will return 0. + Returns: + restart_decay_fn: a function that computes the restart decay. + """ + # pylint:disable=missing-docstring + def restart_decay_fn(global_step): + if global_step is None: + raise ValueError("global_step is required for cosine_decay.") + global_step = math_ops.minimum(global_step, decay_steps) + num = math_ops.mod(num_periods * math_ops.to_float(global_step), + decay_steps) + fraction = num / math_ops.to_float(decay_steps) + decayed = 0.5 * ( + 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction)) + if zero_after is not None: + tmp = math_ops.to_float( + num_periods * global_step) / math_ops.to_float(decay_steps) + decayed = array_ops.where( + math_ops.greater_equal(tmp, zero_after), 0.0, decayed) + return decayed + # pylint:enable=missing-docstring + return restart_decay_fn diff --git a/tensorflow/contrib/opt/python/training/sign_decay_test.py b/tensorflow/contrib/opt/python/training/sign_decay_test.py new file mode 100644 index 0000000000..c31cb924ea --- /dev/null +++ b/tensorflow/contrib/opt/python/training/sign_decay_test.py @@ -0,0 +1,110 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sign_decay.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.contrib.opt.python.training import sign_decay +from tensorflow.python.platform import test + + +def py_linear_decay_fn(decay_steps): + + def linear_decay(step): + step = min(step, decay_steps) + return float(decay_steps - step) / decay_steps + + return linear_decay + + +def py_cosine_decay_fn(decay_steps, num_periods=0.5, zero_after=None): + + def cosine_decay(step): + step = min(step, decay_steps) + fraction = 2.0 * num_periods * step / float(decay_steps) + if zero_after is not None and fraction >= 2 * zero_after: + return 0.0 + return 0.5 * (1.0 + math.cos(math.pi * fraction)) + + return cosine_decay + + +def py_restart_decay_fn(decay_steps, num_periods=1, zero_after=None): + + def restart_decay(step): + step = min(step, decay_steps) + tmp = num_periods * step / float(decay_steps) + fraction = ( + num_periods * step % decay_steps) / float(decay_steps) + if zero_after is not None and tmp >= zero_after: + return 0 + return 0.5 * (1.0 + math.cos(math.pi * fraction)) + + return restart_decay + + +class SignDecaysTest(test.TestCase): + + def testLinearDecay(self): + num_training_steps = 1000 + linear_decay_fn = sign_decay.get_linear_decay_fn(num_training_steps) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = linear_decay_fn(step).eval() + py_decayed = py_linear_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + def testCosineDecay(self): + num_training_steps = 1000 + cosine_decay_fn = sign_decay.get_cosine_decay_fn(num_training_steps) + cosine_decay_2_fn = sign_decay.get_cosine_decay_fn( + num_training_steps, num_periods=5, zero_after=2) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = cosine_decay_fn(step).eval() + py_decayed = py_cosine_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + tf_decayed = cosine_decay_2_fn(step).eval() + py_decayed = py_cosine_decay_fn( + num_training_steps, num_periods=5, zero_after=2)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + def testRestartDecay(self): + num_training_steps = 1000 + restart_decay_fn = sign_decay.get_restart_decay_fn(num_training_steps) + restart_decay_2_fn = sign_decay.get_restart_decay_fn( + num_training_steps, num_periods=5, zero_after=2) + + for step in range(0, 1000, 100): + with self.test_session(): + tf_decayed = restart_decay_fn(step).eval() + py_decayed = py_restart_decay_fn(num_training_steps)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + tf_decayed = restart_decay_2_fn(step).eval() + py_decayed = py_restart_decay_fn( + num_training_steps, num_periods=5, zero_after=2)(step) + self.assertAlmostEqual(tf_decayed, py_decayed, places=4) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 98dfa5a3dd..76c30c5a46 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -15,12 +15,13 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/training_ops.h" #include + #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/training_op_helpers.h" +#include "tensorflow/core/kernels/training_ops.h" #include "tensorflow/core/kernels/variable_ops.h" #ifdef TENSORFLOW_USE_SYCL @@ -361,6 +362,37 @@ struct ApplyCenteredRMSProp { } }; +template +struct ApplyAddSign { + void operator()(const CPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + auto sign_gm = grad.sign() * m.sign(); + var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad; + } +}; + +template +struct ApplyPowerSign { + void operator()(const CPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + auto sign_gm = grad.sign() * m.sign(); + auto grad_scale = (logbase() * sign_decay() * sign_gm).exp(); + var.device(d) -= lr() * grad_scale * grad; + } +}; + } // namespace functor template @@ -3243,4 +3275,220 @@ REGISTER_KERNELS(double, int64); #undef REGISTER_KERNELS + +template +class ApplyAddSignOp : public OpKernel { + public: + explicit ApplyAddSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + } + + void Compute(OpKernelContext* ctx) override { + auto locks = + MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1}); + + Tensor var; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 0, use_exclusive_lock_, false, &var)); + Tensor m; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 1, use_exclusive_lock_, false, &m)); + OP_REQUIRES( + ctx, var.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(0))); + OP_REQUIRES( + ctx, m.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(1))); + const Tensor& lr = ctx->input(2); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()), + errors::InvalidArgument("lr is not a scalar: ", + lr.shape().DebugString())); + const Tensor& alpha = ctx->input(3); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()), + errors::InvalidArgument("alpha is not a scalar: ", + alpha.shape().DebugString())); + const Tensor& sign_decay = ctx->input(4); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(alpha.shape()), + errors::InvalidArgument("sign_decay is not a scalar: ", + sign_decay.shape().DebugString())); + const Tensor& beta = ctx->input(5); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()), + errors::InvalidArgument("beta is not a scalar: ", + beta.shape().DebugString())); + const Tensor& grad = ctx->input(6); + OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()), + errors::InvalidArgument("var and m do not have the same shape", + var.shape().DebugString(), " ", + m.shape().DebugString())); + OP_REQUIRES( + ctx, var.shape().IsSameSize(grad.shape()), + errors::InvalidArgument("var and grad do not have the same shape", + var.shape().DebugString(), " ", + grad.shape().DebugString())); + + const Device& device = ctx->template eigen_device(); + functor::ApplyAddSign()( + device, var.flat(), m.flat(), lr.scalar(), alpha.scalar(), + sign_decay.scalar(), beta.scalar(), grad.flat()); + MaybeForwardRefInputToRefOutput(ctx, 0, 0); + } + + private: + bool use_exclusive_lock_; +}; + +#define REGISTER_KERNELS(D, T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyAddSign").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyAddSignOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyAddSign") \ + .Device(DEVICE_##D) \ + .HostMemory("var") \ + .HostMemory("m") \ + .TypeConstraint("T"), \ + ApplyAddSignOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); + +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyAddSign::operator()( \ + const GPUDevice& d, \ + typename TTypes::Flat var, \ + typename TTypes::Flat m, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar alpha, \ + typename TTypes::ConstScalar sign_decay, \ + typename TTypes::ConstScalar beta, \ + typename TTypes::ConstFlat grad); \ + extern template struct ApplyAddSign; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +REGISTER_KERNELS(GPU, Eigen::half); +REGISTER_KERNELS(GPU, float); +REGISTER_KERNELS(GPU, double); +#endif +#undef REGISTER_CPU_KERNELS +#undef REGISTER_KERNELS + + +template +class ApplyPowerSignOp : public OpKernel { + public: + explicit ApplyPowerSignOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); + } + + void Compute(OpKernelContext* ctx) override { + auto locks = + MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1}); + + Tensor var; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 0, use_exclusive_lock_, false, &var)); + Tensor m; + OP_REQUIRES_OK(ctx, GetInputTensorFromVariable( + ctx, 1, use_exclusive_lock_, false, &m)); + OP_REQUIRES( + ctx, var.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(0))); + OP_REQUIRES( + ctx, m.IsInitialized(), + errors::FailedPrecondition( + "Attempting to use uninitialized variables: ", requested_input(1))); + const Tensor& lr = ctx->input(2); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr.shape()), + errors::InvalidArgument("lr is not a scalar: ", + lr.shape().DebugString())); + const Tensor& logbase = ctx->input(3); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()), + errors::InvalidArgument("logbase is not a scalar: ", + logbase.shape().DebugString())); + const Tensor& sign_decay = ctx->input(4); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(logbase.shape()), + errors::InvalidArgument("sign_decay is not a scalar: ", + sign_decay.shape().DebugString())); + const Tensor& beta = ctx->input(5); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(beta.shape()), + errors::InvalidArgument("beta is not a scalar: ", + beta.shape().DebugString())); + const Tensor& grad = ctx->input(6); + OP_REQUIRES(ctx, var.shape().IsSameSize(m.shape()), + errors::InvalidArgument("var and m do not have the same shape", + var.shape().DebugString(), " ", + m.shape().DebugString())); + OP_REQUIRES( + ctx, var.shape().IsSameSize(grad.shape()), + errors::InvalidArgument("var and grad do not have the same shape", + var.shape().DebugString(), " ", + grad.shape().DebugString())); + + const Device& device = ctx->template eigen_device(); + functor::ApplyPowerSign()( + device, var.flat(), m.flat(), lr.scalar(), logbase.scalar(), + sign_decay.scalar(), beta.scalar(), grad.flat()); + MaybeForwardRefInputToRefOutput(ctx, 0, 0); + } + + private: + bool use_exclusive_lock_; +}; + +#define REGISTER_KERNELS(D, T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyPowerSign").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyPowerSignOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyPowerSign") \ + .Device(DEVICE_##D) \ + .HostMemory("var") \ + .HostMemory("m") \ + .TypeConstraint("T"), \ + ApplyPowerSignOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); + +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyPowerSign::operator()( \ + const GPUDevice& d, \ + typename TTypes::Flat var, \ + typename TTypes::Flat m, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar logbase, \ + typename TTypes::ConstScalar sign_decay, \ + typename TTypes::ConstScalar beta, \ + typename TTypes::ConstFlat grad); \ + extern template struct ApplyPowerSign; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +REGISTER_KERNELS(GPU, Eigen::half); +REGISTER_KERNELS(GPU, float); +REGISTER_KERNELS(GPU, double); +#endif +#undef REGISTER_CPU_KERNELS +#undef REGISTER_KERNELS + } // namespace tensorflow diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h index 99a714e0a2..7ee956053a 100644 --- a/tensorflow/core/kernels/training_ops.h +++ b/tensorflow/core/kernels/training_ops.h @@ -161,6 +161,29 @@ struct ApplyCenteredRMSProp { typename TTypes::ConstScalar epsilon, typename TTypes::ConstFlat grad); }; + +template +struct ApplyAddSign { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad); +}; + +template +struct ApplyPowerSign { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad); +}; + } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 3678b96e98..f501161095 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -193,6 +193,71 @@ struct ApplyCenteredRMSProp { } }; +template +struct ApplyAddSign { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar alpha, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + + // The following is the GPU equivalent of the CPU version: + // m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + const auto one = static_cast(1.0); + auto beta_bcast = beta.reshape(single).broadcast(bcast); + auto one_minus_beta = + (beta.constant(one) - beta).reshape(single).broadcast(bcast); + m.device(d) = m * beta_bcast + grad * one_minus_beta; + + // The following is the GPU equivalent of the CPU version: + // var.device(d) -= lr() * (alpha() + sign_decay() * sign_gm) * grad; + auto sign_gm = grad.sign() * m.sign(); + auto lr_bcast = lr.reshape(single).broadcast(bcast); + auto alpha_bcast = alpha.reshape(single).broadcast(bcast); + auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast); + var.device(d) -= + lr_bcast * (alpha_bcast + sign_decay_bcast * sign_gm) * grad; + } +}; + +template +struct ApplyPowerSign { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar logbase, + typename TTypes::ConstScalar sign_decay, + typename TTypes::ConstScalar beta, + typename TTypes::ConstFlat grad) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + + // The following is the GPU equivalent of the CPU version: + // m.device(d) = m * beta() + grad * (static_cast(1) - beta()); + const auto one = static_cast(1.0); + auto beta_bcast = beta.reshape(single).broadcast(bcast); + auto one_minus_beta = + (beta.constant(one) - beta).reshape(single).broadcast(bcast); + m.device(d) = m * beta_bcast + grad * one_minus_beta; + + // The following is the GPU equivalent of the CPU version: + // auto grad_scale = (logbase() * sign_decay() * sign_gm).exp(); + // var.device(d) -= lr() * grad_scale * grad; + auto sign_gm = grad.sign() * m.sign(); + auto lr_bcast = lr.reshape(single).broadcast(bcast); + auto logbase_bcast = logbase.reshape(single).broadcast(bcast); + auto sign_decay_bcast = sign_decay.reshape(single).broadcast(bcast); + auto grad_scale = (logbase_bcast * sign_decay_bcast * sign_gm).exp(); + var.device(d) -= lr_bcast * grad_scale * grad; + } +}; + } // namespace functor template struct functor::ApplyGradientDescent; @@ -222,6 +287,15 @@ template struct functor::ApplyRMSProp; template struct functor::ApplyCenteredRMSProp; template struct functor::ApplyCenteredRMSProp; template struct functor::ApplyCenteredRMSProp; + +template struct functor::ApplyAddSign; +template struct functor::ApplyAddSign; +template struct functor::ApplyAddSign; + +template struct functor::ApplyPowerSign; +template struct functor::ApplyPowerSign; +template struct functor::ApplyPowerSign; + } // end namespace tensorflow #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc index 4b1c9eb8bb..ffa7f87c9e 100644 --- a/tensorflow/core/kernels/training_ops_test.cc +++ b/tensorflow/core/kernels/training_ops_test.cc @@ -233,4 +233,78 @@ static void BM_RMSProp(int iters, int params) { } BENCHMARK(BM_RMSProp)->Arg(128 << 10)->Arg(256 << 10); +static void AddSign(int32 n, Graph** init_g, Graph** train_g) { + TensorShape shape({n}); + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto zero = Zeros(g, n); + test::graph::Assign(g, var, zero); + test::graph::Assign(g, m, zero); + *init_g = g; + } + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto lr = Scalar(g, 0.01); + auto alpha = Scalar(g, 0.1); + auto sign_decay = Scalar(g, 0.9); + auto beta = Scalar(g, 0.8); + auto grad = Random(g, n); + test::graph::Multi(g, "ApplyAddSign", + {var, m, lr, alpha, sign_decay, beta, grad}); + *train_g = g; + } +} + +static void BM_AddSign(int iters, int params) { + const int64 tot = static_cast(iters) * params; + testing::ItemsProcessed(tot); + testing::BytesProcessed(tot * sizeof(float)); + Graph* init; + Graph* train; + AddSign(params, &init, &train); + test::Benchmark("cpu", train, GetOptions(), init).Run(iters); +} +BENCHMARK(BM_AddSign)->Arg(128 << 10)->Arg(256 << 10); + +static void PowerSign(int32 n, Graph** init_g, Graph** train_g) { + TensorShape shape({n}); + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto zero = Zeros(g, n); + test::graph::Assign(g, var, zero); + test::graph::Assign(g, m, zero); + *init_g = g; + } + { + Graph* g = new Graph(OpRegistry::Global()); + auto var = Var(g, n); + auto m = Var(g, n); + auto lr = Scalar(g, 0.01); + auto logbase = Scalar(g, 2); + auto sign_decay = Scalar(g, 0.9); + auto beta = Scalar(g, 0.8); + auto grad = Random(g, n); + test::graph::Multi(g, "ApplyPowerSign", + {var, m, lr, logbase, sign_decay, beta, grad}); + *train_g = g; + } +} + +static void BM_PowerSign(int iters, int params) { + const int64 tot = static_cast(iters) * params; + testing::ItemsProcessed(tot); + testing::BytesProcessed(tot * sizeof(float)); + Graph* init; + Graph* train; + PowerSign(params, &init, &train); + test::Benchmark("cpu", train, GetOptions(), init).Run(iters); +} +BENCHMARK(BM_PowerSign)->Arg(128 << 10)->Arg(256 << 10); + } // end namespace tensorflow diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc index 6f06b87d58..405318caf2 100644 --- a/tensorflow/core/ops/training_ops.cc +++ b/tensorflow/core/ops/training_ops.cc @@ -22,6 +22,48 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; +const char kAddSignCommonDocStr[] = R"doc( +Update '*var' according to the AddSign update. + +m_t <- beta1 * m_{t-1} + (1 - beta1) * g +update <- (alpha + sign_decay * sign(g) *sign(m)) * g +variable <- variable - lr_t * update + +var: Should be from a Variable(). +m: Should be from a Variable(). +lr: Scaling factor. Must be a scalar. +sign_decay: Must be a scalar. +alpha: Must be a scalar. +beta: Must be a scalar. +grad: The gradient. +)doc"; + +const char kPowerSignCommonDocStr[] = R"doc( +Update '*var' according to the AddSign update. + +m_t <- beta1 * m_{t-1} + (1 - beta1) * g +update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +variable <- variable - lr_t * update + +var: Should be from a Variable(). +m: Should be from a Variable(). +lr: Scaling factor. Must be a scalar. +logbase: Must be a scalar. +sign_decay: Must be a scalar. +beta: Must be a scalar. +grad: The gradient. +)doc"; + +const char kOutDocStr[] = R"doc( +out: Same as "var". +)doc"; + +const char kLockDocStr[] = R"doc( +use_locking: If `True`, updating of the var and m tensors is + protected by a lock; otherwise the behavior is undefined, but may exhibit less + contention. +)doc"; + static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) { auto* handle_data = c->input_handle_shapes_and_types(input); if (handle_data != nullptr && !handle_data->empty() && @@ -1796,4 +1838,99 @@ use_locking: If `True`, updating of the var, mg, ms, and mom tensors is contention. )doc"); +static Status ApplyAddSignShapeFn(InferenceContext* c, bool sparse) { + ShapeHandle unused; + ShapeHandle s = ShapeOrHandleShape(c, 0); // var + TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // alpha + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // sign_decay + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // beta + TF_RETURN_IF_ERROR( + HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s)); + if (c->num_outputs() > 0) { + c->set_output(0, s); + } + return Status::OK(); +} + +REGISTER_OP("ApplyAddSign") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("lr: T") + .Input("alpha: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAddSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kAddSignCommonDocStr, kOutDocStr, kLockDocStr)); + +REGISTER_OP("ResourceApplyAddSign") + .Input("var: resource") + .Input("m: resource") + .Input("lr: T") + .Input("alpha: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAddSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kAddSignCommonDocStr, kLockDocStr)); + +static Status ApplyPowerSignShapeFn(InferenceContext* c, bool sparse) { + ShapeHandle unused; + ShapeHandle s = ShapeOrHandleShape(c, 0); // var + TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // logbase + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // sign_delay + TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // beta + TF_RETURN_IF_ERROR( + HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s)); + if (c->num_outputs() > 0) { + c->set_output(0, s); + } + return Status::OK(); +} + +REGISTER_OP("ApplyPowerSign") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("lr: T") + .Input("logbase: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyPowerSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kPowerSignCommonDocStr, kOutDocStr, kLockDocStr)); + +REGISTER_OP("ResourceApplyPowerSign") + .Input("var: resource") + .Input("m: resource") + .Input("lr: T") + .Input("logbase: T") + .Input("sign_decay: T") + .Input("beta: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyPowerSignShapeFn(c, /*sparse=*/false); + }) + .Doc(strings::StrCat(kPowerSignCommonDocStr, kLockDocStr)); + + } // namespace tensorflow diff --git a/tensorflow/core/ops/training_ops_test.cc b/tensorflow/core/ops/training_ops_test.cc index 92d5ad9964..de4e3cd9e7 100644 --- a/tensorflow/core/ops/training_ops_test.cc +++ b/tensorflow/core/ops/training_ops_test.cc @@ -332,4 +332,38 @@ TEST(TrainingOpsTest, SparseApplyRMSProp_ShapeFn) { INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;?;[?];?;?"); } +TEST(TrainingOpsTest, ApplyAddSign_ShapeFn) { + ShapeInferenceTestOp op("ApplyAddSign"); + + // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad). + INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[2];[];[];[];[];[1]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[1];[];[];[];[];[2]"); + + // lr, alpha, sign_decay, and beta must be scalars. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?"); +} + +TEST(TrainingOpsTest, ApplyPowerSign_ShapeFn) { + ShapeInferenceTestOp op("ApplyPowerSign"); + + // Output is a merge of inputs 0, 1, and 6 (var, ms, and grad). + INFER_OK(op, "[1,?,?];[?,2,?];[];[];[];[];[?,?,2]", "[d0_0,d1_1,d6_2]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[2];[];[];[];[];[1]"); + INFER_ERROR("Dimension 0 in both shapes must be equal, but are 1 and 2", op, + "[1];[1];[];[];[];[];[2]"); + + // lr, logbase, sign_decay, and beta must be scalars. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;[?];?;?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;[?];?;?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;[?];?;?"); + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;?;?;?;?;[?];?"); +} + } // end namespace tensorflow -- GitLab From aab5a41eb139812a50a728a9e888bb0290c4c95e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:20:44 -0800 Subject: [PATCH 0059/1225] Update ops-related pbtxt files. PiperOrigin-RevId: 176143870 --- .../core/ops/compat/ops_history.v1.pbtxt | 264 +++++++++++++++ tensorflow/core/ops/ops.pbtxt | 306 ++++++++++++++++++ 2 files changed, 570 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index daeb6763c8..c7a296d938 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -1536,6 +1536,75 @@ op { } } } +op { + name: "ApplyAddSign" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "alpha" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyCenteredRMSProp" input_arg { @@ -2228,6 +2297,75 @@ op { } } } +op { + name: "ApplyPowerSign" + input_arg { + name: "var" + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "logbase" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + output_arg { + name: "out" + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } +} op { name: "ApplyProximalAdagrad" input_arg { @@ -26847,6 +26985,69 @@ op { } is_stateful: true } +op { + name: "ResourceApplyAddSign" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "alpha" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyCenteredRMSProp" input_arg { @@ -27473,6 +27674,69 @@ op { } is_stateful: true } +op { + name: "ResourceApplyPowerSign" + input_arg { + name: "var" + type: DT_RESOURCE + } + input_arg { + name: "m" + type: DT_RESOURCE + } + input_arg { + name: "lr" + type_attr: "T" + } + input_arg { + name: "logbase" + type_attr: "T" + } + input_arg { + name: "sign_decay" + type_attr: "T" + } + input_arg { + name: "beta" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "ResourceApplyProximalAdagrad" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 55a8fc9032..d043696a94 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -1103,6 +1103,86 @@ op { summary: "Update \'*var\' according to the Adam algorithm." description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)" } +op { + name: "ApplyAddSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "alpha" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + output_arg { + name: "out" + description: "Same as \"var\"." + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update" +} op { name: "ApplyCenteredRMSProp" input_arg { @@ -1506,6 +1586,86 @@ op { summary: "Update \'*var\' according to the momentum scheme. Set use_nesterov = True if you" description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum" } +op { + name: "ApplyPowerSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type_attr: "T" + is_ref: true + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "logbase" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + output_arg { + name: "out" + description: "Same as \"var\"." + type_attr: "T" + is_ref: true + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update" +} op { name: "ApplyProximalAdagrad" input_arg { @@ -21774,6 +21934,79 @@ op { description: "lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)\nm_t <- beta1 * m_{t-1} + (1 - beta1) * g_t\nv_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t\nvariable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)" is_stateful: true } +op { + name: "ResourceApplyAddSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "alpha" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- (alpha + sign_decay * sign(g) *sign(m)) * g\nvariable <- variable - lr_t * update" + is_stateful: true +} op { name: "ResourceApplyCenteredRMSProp" input_arg { @@ -22139,6 +22372,79 @@ op { description: "want to use Nesterov momentum.\n\naccum = accum * momentum + grad\nvar -= lr * accum" is_stateful: true } +op { + name: "ResourceApplyPowerSign" + input_arg { + name: "var" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "m" + description: "Should be from a Variable()." + type: DT_RESOURCE + } + input_arg { + name: "lr" + description: "Scaling factor. Must be a scalar." + type_attr: "T" + } + input_arg { + name: "logbase" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "sign_decay" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "beta" + description: "Must be a scalar." + type_attr: "T" + } + input_arg { + name: "grad" + description: "The gradient." + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT64 + type: DT_INT32 + type: DT_UINT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "use_locking" + type: "bool" + default_value { + b: false + } + description: "If `True`, updating of the var and m tensors is\nprotected by a lock; otherwise the behavior is undefined, but may exhibit less\ncontention." + } + summary: "Update \'*var\' according to the AddSign update." + description: "m_t <- beta1 * m_{t-1} + (1 - beta1) * g\nupdate <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g\nvariable <- variable - lr_t * update" + is_stateful: true +} op { name: "ResourceApplyProximalAdagrad" input_arg { -- GitLab From d32150d14f1651e20bafa07e6f1b51a32fd75999 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:30:54 -0800 Subject: [PATCH 0060/1225] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 176145113 --- tensorflow/go/op/wrappers.go | 2276 ++++++++++++++++++---------------- 1 file changed, 1187 insertions(+), 1089 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 1d1383ec82..664e37d3a1 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2681,21 +2681,6 @@ func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) { return scope.AddOperation(opspec) } -// Does nothing. Serves as a control trigger for scheduling. -// -// Only useful as a placeholder for control edges. -// -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ControlTrigger", - } - return scope.AddOperation(opspec) -} - // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) @@ -8123,88 +8108,82 @@ func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. -type QuantizeAndDequantizeV3Attr func(optionalAttr) - -// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// Quantizes then dequantizes a tensor. +// Merges summaries. // -// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a -// tensor, so its value can change during training. -func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { +// This op creates a +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// protocol buffer that contains the union of all the values in the input +// summaries. +// +// When the Op is run, it reports an `InvalidArgument` error if multiple values +// in the summaries to merge use the same tag. +// +// Arguments: +// inputs: Can be of any shape. Each must contain serialized `Summary` protocol +// buffers. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV3", + Type: "MergeSummary", Input: []tf.Input{ - input, input_min, input_max, num_bits, + tf.OutputList(inputs), }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AvgPool3DAttr is an optional argument to AvgPool3D. -type AvgPool3DAttr func(optionalAttr) +// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. +type AudioSummaryV2Attr func(optionalAttr) -// AvgPool3DDataFormat sets the optional data_format attribute to value. +// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DDataFormat(value string) AvgPool3DAttr { +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { return func(m optionalAttr) { - m["data_format"] = value + m["max_outputs"] = value } } -// Performs 3D average pooling on the input. +// Outputs a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. // // Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. // -// Returns The average pooled output tensor. -func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPool3D", + Type: "AudioSummaryV2", Input: []tf.Input{ - input, + tag, tensor, sample_rate, }, Attrs: attrs, } @@ -8212,35 +8191,6 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } -// Produces the max pool of the input tensor for quantized types. -// -// Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. type Conv3DBackpropInputV2Attr func(optionalAttr) @@ -8725,32 +8675,6 @@ func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad return op.Output(0) } -// Adds `bias` to `value`. -// -// This is a deprecated version of BiasAdd and will be soon removed. -// -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. -// -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. -// -// Returns Broadcasted sum of `value` and `bias`. -func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BiasAddV1", - Input: []tf.Input{ - value, bias, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // EncodeJpegAttr is an optional argument to EncodeJpeg. type EncodeJpegAttr func(optionalAttr) @@ -13128,41 +13052,264 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp return op.Output(0) } -// Batch normalization. +// WriteImageSummaryAttr is an optional argument to WriteImageSummary. +type WriteImageSummaryAttr func(optionalAttr) + +// WriteImageSummaryMaxImages sets the optional max_images attribute to value. // -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 // -// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// REQUIRES: value >= 1 +func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { + return func(m optionalAttr) { + m["max_images"] = value + } +} + +// Writes a `Summary` protocol buffer with images. +// +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. // // Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { +// writer: A handle to a summary writer. +// step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// bad_color: Color to use for pixels with non-finite values. +// +// Returns the created operation. +func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", + Type: "WriteImageSummary", Input: []tf.Input{ - t, m, v, beta, gamma, + writer, step, tag, tensor, bad_color, }, Attrs: attrs, } - op := scope.AddOperation(opspec) + return scope.AddOperation(opspec) +} + +// Pads a tensor with zeros. +// +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] +// ``` +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Pad", + Input: []tf.Input{ + input, paddings, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the number of elements in the given queue. +// +// Arguments: +// handle: The handle to a queue. +// +// Returns The number of elements in the given queue. +func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QueueSizeV2", + Input: []tf.Input{ + handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a histogram. +// +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. +// +// This op reports an `InvalidArgument` error if any value is not finite. +// +// Arguments: +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "HistogramSummary", + Input: []tf.Input{ + tag, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that emits the lines of one or more text files. +// +// Arguments: +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TextLineDataset", + Input: []tf.Input{ + filenames, compression_type, buffer_size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the number of records this Reader has produced. +// +// This is the same as the number of ReaderRead executions that have +// succeeded. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumRecordsProducedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes exponential of x - 1 element-wise. +// +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Expm1", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Batch normalization. +// +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + opspec := tf.OpSpec{ + Type: "BatchNormWithGlobalNormalization", + Input: []tf.Input{ + t, m, v, beta, gamma, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) return op.Output(0) } @@ -13284,35 +13431,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp return key, values } -// Merges summaries. -// -// This op creates a -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// protocol buffer that contains the union of all the values in the input -// summaries. -// -// When the Op is run, it reports an `InvalidArgument` error if multiple values -// in the summaries to merge use the same tag. -// -// Arguments: -// inputs: Can be of any shape. Each must contain serialized `Summary` protocol -// buffers. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MergeSummary", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Read an element from the TensorArray into output `value`. // // Arguments: @@ -14006,56 +14124,120 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou return op.Output(0) } -// Reorders a SparseTensor into the canonical, row-major ordering. -// -// Note that by convention, all sparse ops preserve the canonical ordering along -// increasing dimension number. The only time ordering can be violated is during -// manual manipulation of the indices and values vectors to add entries. -// -// Reordering does not affect the shape of the SparseTensor. -// -// If the tensor has rank `R` and `N` non-empty values, `input_indices` has -// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// Does nothing. Serves as a control trigger for scheduling. // -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. +// Only useful as a placeholder for control edges. // -// Returns 2-D. `N x R` matrix with the same indices as input_indices, but -// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. -func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { +// Returns the created operation. +func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseReorder", - Input: []tf.Input{ - input_indices, input_values, input_shape, - }, + Type: "ControlTrigger", } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// PackAttr is an optional argument to Pack. -type PackAttr func(optionalAttr) +// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. +type ResourceApplyAddSignAttr func(optionalAttr) -// PackAxis sets the optional axis attribute to value. +// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. // -// value: Dimension along which to pack. Negative values wrap around, so the -// valid range is `[-(R+1), R+1)`. -// If not specified, defaults to 0 -func PackAxis(value int64) PackAttr { +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { return func(m optionalAttr) { - m["axis"] = value + m["use_locking"] = value } } -// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. +// Update '*var' according to the AddSign update. // -// Packs the `N` tensors in `values` into a tensor with rank one higher than each +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- (alpha + sign_decay * sign(g) *sign(m)) * g +// variable <- variable - lr_t * update +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// alpha: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAddSign", + Input: []tf.Input{ + var_, m, lr, alpha, sign_decay, beta, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Reorders a SparseTensor into the canonical, row-major ordering. +// +// Note that by convention, all sparse ops preserve the canonical ordering along +// increasing dimension number. The only time ordering can be violated is during +// manual manipulation of the indices and values vectors to add entries. +// +// Reordering does not affect the shape of the SparseTensor. +// +// If the tensor has rank `R` and `N` non-empty values, `input_indices` has +// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 2-D. `N x R` matrix with the same indices as input_indices, but +// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. +func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseReorder", + Input: []tf.Input{ + input_indices, input_values, input_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// PackAttr is an optional argument to Pack. +type PackAttr func(optionalAttr) + +// PackAxis sets the optional axis attribute to value. +// +// value: Dimension along which to pack. Negative values wrap around, so the +// valid range is `[-(R+1), R+1)`. +// If not specified, defaults to 0 +func PackAxis(value int64) PackAttr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. +// +// Packs the `N` tensors in `values` into a tensor with rank one higher than each // tensor in `values`, by packing them along the `axis` dimension. // Given a list of tensors of shape `(A, B, C)`; // @@ -14151,6 +14333,133 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max return op.Output(0), op.Output(1), op.Output(2) } +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "BytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// QrAttr is an optional argument to Qr. +type QrAttr func(optionalAttr) + +// QrFullMatrices sets the optional full_matrices attribute to value. +// +// value: If true, compute full-sized `q` and `r`. If false +// (the default), compute only the leading `P` columns of `q`. +// If not specified, defaults to false +func QrFullMatrices(value bool) QrAttr { + return func(m optionalAttr) { + m["full_matrices"] = value + } +} + +// Computes the QR decompositions of one or more matrices. +// +// Computes the QR decomposition of each inner matrix in `tensor` such that +// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` +// +// ```python +// # a is a tensor. +// # q is a tensor of orthonormal matrices. +// # r is a tensor of upper triangular matrices. +// q, r = qr(a) +// q_full, r_full = qr(a, full_matrices=True) +// ``` +// +// Arguments: +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// +// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then +// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is +// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. +func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Qr", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// AudioSummaryAttr is an optional argument to AudioSummary. +type AudioSummaryAttr func(optionalAttr) + +// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Outputs a `Summary` protocol buffer with audio. +// +// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"sample_rate": sample_rate} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSummary", + Input: []tf.Input{ + tag, tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Reverses specific dimensions of a tensor. // // NOTE `tf.reverse` has now changed behavior in preparation for 1.0. @@ -14671,6 +14980,24 @@ func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.O return op.Output(0), op.Output(1) } +// Returns x - y element-wise. +// +// *NOTE*: `Sub` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Sub", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Get the value of the tensor specified by its handle. // // Arguments: @@ -17551,69 +17878,21 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) return op.Output(0) } -// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. -type AvgPool3DGradAttr func(optionalAttr) - -// AvgPool3DGradDataFormat sets the optional data_format attribute to value. +// Inverse fast Fourier transform. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of average pooling function. +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. // // Arguments: -// orig_input_shape: The original input dimensions. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// input: A complex64 tensor. // -// Returns The backprop for input. -func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool3DGrad", - Input: []tf.Input{ - orig_input_shape, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } @@ -17850,6 +18129,55 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) return op.Output(0) } +// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. +type ResourceApplyPowerSignAttr func(optionalAttr) + +// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the AddSign update. +// +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +// variable <- variable - lr_t * update +// +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// logbase: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyPowerSign", + Input: []tf.Input{ + var_, m, lr, logbase, sign_decay, beta, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // DestroyResourceOpAttr is an optional argument to DestroyResourceOp. type DestroyResourceOpAttr func(optionalAttr) @@ -17988,6 +18316,32 @@ func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Outpu return op.Output(0), op.Output(1) } +// Adds `bias` to `value`. +// +// This is a deprecated version of BiasAdd and will be soon removed. +// +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. +// +// Arguments: +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. +// +// Returns Broadcasted sum of `value` and `bias`. +func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BiasAddV1", + Input: []tf.Input{ + value, bias, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. type FixedLengthRecordReaderV2Attr func(optionalAttr) @@ -18195,169 +18549,6 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) ( return op.Output(0) } -// WriteImageSummaryAttr is an optional argument to WriteImageSummary. -type WriteImageSummaryAttr func(optionalAttr) - -// WriteImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} - -// Writes a `Summary` protocol buffer with images. -// -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: -// -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. -// -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: -// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. -// -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. -// -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. -// -// Arguments: -// writer: A handle to a summary writer. -// step: The step to write the summary for. -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. -// bad_color: Color to use for pixels with non-finite values. -// -// Returns the created operation. -func WriteImageSummary(scope *Scope, writer tf.Output, step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "WriteImageSummary", - Input: []tf.Input{ - writer, step, tag, tensor, bad_color, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Pad", - Input: []tf.Input{ - input, paddings, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the number of elements in the given queue. -// -// Arguments: -// handle: The handle to a queue. -// -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QueueSizeV2", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a histogram. -// -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. -// -// This op reports an `InvalidArgument` error if any value is not finite. -// -// Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "HistogramSummary", - Input: []tf.Input{ - tag, values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // AsStringAttr is an optional argument to AsString. type AsStringAttr func(optionalAttr) @@ -18989,7 +19180,158 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) + +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { + return func(m optionalAttr) { + m["out_idx"] = value + } +} + +// Finds unique elements in a 1-D tensor. +// +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: +// +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// +// For example: +// +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns 1-D.1-D.1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UniqueWithCounts", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. +// +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. +// +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. +// +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RestoreSlice", + Input: []tf.Input{ + file_pattern, tensor_name, shape_and_slice, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) + +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessTruncatedNormal", + Input: []tf.Input{ + shape, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. // // N is the size of the segment being reduced. // @@ -19891,92 +20233,205 @@ func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { return op.Output(0) } -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// Creates a dataset that skips `count` elements from the `input_dataset`. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "IRFFT", + Type: "SkipDataset", Input: []tf.Input{ - input, fft_length, + input_dataset, count, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Adds a value to the current value of a variable. -// -// Any ReadVariableOp which depends directly or indirectly on this assign is -// guaranteed to see the incremented value or a subsequent newer one. +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) + +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the imaginary part of a complex number. // -// Outputs the incremented value, which can be used to totally order the -// increments to this variable. +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. // -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// For example: // -// Returns the created operation. -func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "AssignAddVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } opspec := tf.OpSpec{ - Type: "Asinh", + Type: "Imag", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Real-valued fast Fourier transform. +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) + +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Converts two real numbers to a complex number. // -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. +// +// The input tensors `real` and `imag` must have the same shape. +// +// For example: +// +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Complex", + Input: []tf.Input{ + real, imag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse real-valued fast Fourier transform. +// +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. +// +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. +// +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IRFFT", + Input: []tf.Input{ + input, fft_length, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Adds a value to the current value of a variable. +// +// Any ReadVariableOp which depends directly or indirectly on this assign is +// guaranteed to see the incremented value or a subsequent newer one. +// +// Outputs the incremented value, which can be used to totally order the +// increments to this variable. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignAddVariableOp", + Input: []tf.Input{ + resource, value, + }, + } + return scope.AddOperation(opspec) +} + +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Asinh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Real-valued fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. // // Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the // `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, @@ -20311,85 +20766,30 @@ func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Outp return op.Output(0) } -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) +// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. +type QuantizeAndDequantizeV3Attr func(optionalAttr) -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { +// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { return func(m optionalAttr) { - m["preferred_shard"] = value + m["signed_input"] = value } } -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. -// -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RestoreSlice", - Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) - -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { +// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { return func(m optionalAttr) { - m["dtype"] = value + m["range_given"] = value } } -// Outputs deterministic pseudorandom values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// Quantizes then dequantizes a tensor. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a +// tensor, so its value can change during training. +func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -20398,9 +20798,9 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "QuantizeAndDequantizeV3", Input: []tf.Input{ - shape, seed, + input, input_min, input_max, num_bits, }, Attrs: attrs, } @@ -20408,246 +20808,125 @@ func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, opt return op.Output(0) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) +// AvgPool3DAttr is an optional argument to AvgPool3D. +type AvgPool3DAttr func(optionalAttr) -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { +// AvgPool3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DDataFormat(value string) AvgPool3DAttr { return func(m optionalAttr) { - m["out_idx"] = value + m["data_format"] = value } } -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` +// Performs 3D average pooling on the input. // // Arguments: -// x: 1-D. +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// Returns 1-D.1-D.1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { +// Returns The average pooled output tensor. +func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UniqueWithCounts", + Type: "AvgPool3D", Input: []tf.Input{ - x, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. +// Produces the max pool of the input tensor for quantized types. // // Arguments: +// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. +// padding: The type of padding algorithm to use. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// -// -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "SkipDataset", + Type: "QuantizedMaxPool", Input: []tf.Input{ - input_dataset, count, + input, min_input, max_input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) - -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} +// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. +type AvgPool3DGradAttr func(optionalAttr) -// Converts two real numbers to a complex number. -// -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. -// -// The input tensors `real` and `imag` must have the same shape. -// -// For example: +// AvgPool3DGradDataFormat sets the optional data_format attribute to value. // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Complex", - Input: []tf.Input{ - real, imag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) - -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr { return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the imaginary part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] -// ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Imag", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the lines of one or more text files. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, + m["data_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. +// Computes gradients of average pooling function. // // Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential of x - 1 element-wise. -// -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Expm1", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x - y element-wise. +// orig_input_shape: The original input dimensions. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// *NOTE*: `Sub` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns The backprop for input. +func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Sub", + Type: "AvgPool3DGrad", Input: []tf.Input{ - x, y, + orig_input_shape, grad, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -24870,133 +25149,6 @@ func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Ou return scope.AddOperation(opspec) } -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "BytesProducedStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QrAttr is an optional argument to Qr. -type QrAttr func(optionalAttr) - -// QrFullMatrices sets the optional full_matrices attribute to value. -// -// value: If true, compute full-sized `q` and `r`. If false -// (the default), compute only the leading `P` columns of `q`. -// If not specified, defaults to false -func QrFullMatrices(value bool) QrAttr { - return func(m optionalAttr) { - m["full_matrices"] = value - } -} - -// Computes the QR decompositions of one or more matrices. -// -// Computes the QR decomposition of each inner matrix in `tensor` such that -// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` -// -// ```python -// # a is a tensor. -// # q is a tensor of orthonormal matrices. -// # r is a tensor of upper triangular matrices. -// q, r = qr(a) -// q_full, r_full = qr(a, full_matrices=True) -// ``` -// -// Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. -// -// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then -// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is -// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. -func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Qr", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// AudioSummaryAttr is an optional argument to AudioSummary. -type AudioSummaryAttr func(optionalAttr) - -// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sample_rate": sample_rate} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummary", - Input: []tf.Input{ - tag, tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // BiasAddAttr is an optional argument to BiasAdd. type BiasAddAttr func(optionalAttr) @@ -25198,74 +25350,195 @@ func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr } } -// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a log-uniform distribution. +// +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LogUniformCandidateSampler", + Input: []tf.Input{ + true_classes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the truth value of (x < y) element-wise. +// +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Less", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient. +type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value. +// +// value: The bitwidth of the quantization; between 2 and 8, inclusive. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value. +// +// value: Whether to quantize into 2^num_bits - 1 distinct values. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Compute gradients for a FakeQuantWithMinMaxVars operation. +// +// Arguments: +// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. +// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. +// min, max: Quantization interval, scalar floats. +// +// +// +// Returns Backpropagated gradients w.r.t. inputs: +// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter: +// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter: +// `sum(gradients * (inputs > max))`. +func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FakeQuantWithMinMaxVarsGradient", + Input: []tf.Input{ + gradients, inputs, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. +type MaxPoolGradV2Attr func(optionalAttr) + +// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["data_format"] = value } } -// Generates labels for candidate sampling with a log-uniform distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// Computes gradients of the maxpooling function. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LogUniformCandidateSampler", + Type: "MaxPoolGradV2", Input: []tf.Input{ - true_classes, + orig_input, orig_output, grad, ksize, strides, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns the truth value of (x < y) element-wise. +// Returns the min of x and y (i.e. x < y ? x : y) element-wise. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting +// *NOTE*: `Minimum` supports broadcasting. More about broadcasting // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Less", + Type: "Minimum", Input: []tf.Input{ x, y, }, @@ -27136,127 +27409,6 @@ func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, val return scope.AddOperation(opspec) } -// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient. -type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization; between 2 and 8, inclusive. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value. -// -// value: Whether to quantize into 2^num_bits - 1 distinct values. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxVars operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. -// min, max: Quantization interval, scalar floats. -// -// -// -// Returns Backpropagated gradients w.r.t. inputs: -// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter: -// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter: -// `sum(gradients * (inputs > max))`. -func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsGradient", - Input: []tf.Input{ - gradients, inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. -type MaxPoolGradV2Attr func(optionalAttr) - -// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. -// -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Minimum", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a dataset that asynchronously prefetches elements from `input_dataset`. // // Arguments: @@ -27301,57 +27453,3 @@ func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_s op := scope.AddOperation(opspec) return op.Output(0) } - -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) - -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummaryV2", - Input: []tf.Input{ - tag, tensor, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 859df2a2a1bdfb02cf370f7b68e3c6802e822b15 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 17 Nov 2017 12:32:58 -0800 Subject: [PATCH 0061/1225] Remove the existence of unused HloProtos. PiperOrigin-RevId: 176145413 --- tensorflow/compiler/xla/service/hlo.proto | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 79493c4112..e984bdb5f7 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -250,7 +250,3 @@ message HloProto { HloOrderingProto hlo_ordering = 2; BufferAssignmentProto buffer_assignment = 3; } - -message HloProtos { - repeated HloProto hlo_protos = 1; -} -- GitLab From a715b06555a0c14e95f30569f40a97019af6a6b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 12:34:28 -0800 Subject: [PATCH 0062/1225] [XLA:CPU/GPU] Revert back to previous buffer aliasing calculation for fused DynamicUpdateSlice in-place updates (fused instructions compared in current calculation are not assigned buffers, so I think the current calculation is always returning false). PiperOrigin-RevId: 176145589 --- tensorflow/compiler/xla/service/llvm_ir/ops.h | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ops.h b/tensorflow/compiler/xla/service/llvm_ir/ops.h index 11e84d9cb5..f72f482e31 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ops.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ops.h @@ -40,11 +40,24 @@ bool CanUpdateDynamicSliceInPlace(HloInstruction* dynamic_update_slice, inline bool CanEmitFusedDynamicUpdateSliceInPlace( HloInstruction* fusion, const BufferAssignment& assignment) { CHECK_EQ(fusion->opcode(), HloOpcode::kFusion); - return fusion->fusion_kind() == HloInstruction::FusionKind::kLoop && - fusion->fused_expression_root()->opcode() == - HloOpcode::kDynamicUpdateSlice && - CanUpdateDynamicSliceInPlace(fusion->fused_expression_root(), - assignment); + HloInstruction* fused_root = fusion->fused_expression_root(); + if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice || + fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) { + return false; + } + // Walk DynamicUpdateSlice operand(0) to fused parameter and get its + // associated operand. See if it shares an allocation with this operand. + HloInstruction* fusion_operand; + ShapeIndex index; + std::tie(fusion_operand, index) = + fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex(); + if (fusion_operand->opcode() != HloOpcode::kParameter) { + return false; + } + auto* operand = fusion->operand(fusion_operand->parameter_number()); + return assignment.HasAllocationAt(operand, index) && + assignment.HasAllocationAt(fusion, {}) && + assignment.SharesSliceAtIndex(fusion, {}, operand, index); } // Emits IR for running the given dynamic-update-slice op in-place -- that is, -- GitLab From 6a7cdfa8c973f3ce6a31664233fc8b096f2ba393 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 17 Nov 2017 12:46:18 -0800 Subject: [PATCH 0063/1225] Improved shape inference PiperOrigin-RevId: 176147013 --- .../core/common_runtime/shape_refiner.cc | 31 +- .../core/common_runtime/shape_refiner_test.cc | 15 +- tensorflow/core/framework/shape_inference.cc | 60 +- tensorflow/core/framework/shape_inference.h | 27 +- .../core/framework/shape_inference_test.cc | 13 +- .../core/grappler/costs/graph_properties.cc | 669 ++++++++++++------ .../core/grappler/costs/graph_properties.h | 50 +- .../grappler/costs/graph_properties_test.cc | 23 +- .../while_loop.pbtxt | 20 +- 9 files changed, 591 insertions(+), 317 deletions(-) diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 10901da192..d66865e45b 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -335,10 +335,14 @@ Status ShapeRefiner::UpdateNode(const Node* node, bool relax, bool* refined) { InferenceContext* c = iter->second->get_context(); DCHECK_GE(dst_input, 0); ShapeHandle existing_input = node_context->input(dst_input); - if (!relax && node_context->MergeInput(dst_input, c->output(src_output)) && - !existing_input.SameHandle(node_context->input(dst_input))) { - *refined = true; - } else if (relax) { + if (!relax) { + if (node_context->MergeInput(dst_input, c->output(src_output))) { + if (!SameDefinedShape(node_context, node_context->input(dst_input), + existing_input)) { + *refined = true; + } + } + } else { if (node_context->RelaxInput(dst_input, c->output(src_output))) { if (!SameDefinedShape(node_context, node_context->input(dst_input), existing_input)) { @@ -865,15 +869,22 @@ Status ShapeRefiner::RunShapeFn(const Node* node, bool ShapeRefiner::SameDefinedShape(InferenceContext* c, ShapeHandle s0, ShapeHandle s1) { - if (!c->RankKnown(s0)) { - return !c->RankKnown(s1); - } else if (!c->RankKnown(s1) || c->Rank(s0) != c->Rank(s1)) { + if (s0.SameHandle(s1)) { + return true; + } + if (c->Rank(s0) != c->Rank(s1)) { + return false; + } + if (!c->RankKnown(s0) && !c->RankKnown(s1)) { return false; } - for (int i = 0; i < c->Rank(s0); ++i) { - if (c->Value(c->Dim(s0, i)) != c->Value(c->Dim(s1, i))) { - return false; + if (!c->Dim(s0, i).SameHandle(c->Dim(s1, i))) { + int64 val0 = c->Value(c->Dim(s0, i)); + int64 val1 = c->Value(c->Dim(s1, i)); + if (val0 < 0 || val1 < 0 || val0 != val1) { + return false; + } } } diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc index ff32e855d5..e4eef1dbe2 100644 --- a/tensorflow/core/common_runtime/shape_refiner_test.cc +++ b/tensorflow/core/common_runtime/shape_refiner_test.cc @@ -1161,11 +1161,13 @@ TEST_F(ShapeRefinerTest, SameDefinedShape) { auto s_unknown_2 = ctx->MakeShape({-1, 2}); auto s_unknown_2_b = ctx->MakeShape({-1, 2}); - EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown_b)); + EXPECT_TRUE(SameDefinedShape(ctx, unknown, unknown)); + EXPECT_FALSE(SameDefinedShape(ctx, unknown, unknown_b)); EXPECT_FALSE(SameDefinedShape(ctx, unknown, s_1_2)); EXPECT_TRUE(SameDefinedShape(ctx, s_1_2, s_1_2_b)); EXPECT_FALSE(SameDefinedShape(ctx, s_1_2, s_2_2)); - EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b)); + EXPECT_TRUE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2)); + EXPECT_FALSE(SameDefinedShape(ctx, s_unknown_2, s_unknown_2_b)); } TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) { @@ -1178,14 +1180,15 @@ TEST_F(ShapeRefinerTest, IsUpdatedShapesOrTypes) { TF_ASSERT_OK(m.AddNode(test)); shape_inference::InferenceContext* ctx = m.GetContext(test); + shape_inference::ShapeHandle unknown = ctx->UnknownShape(); std::vector t0{ {ctx->MakeShape({1, 2, 3}), DT_FLOAT}, - {ctx->UnknownShape(), DT_INVALID}, + {unknown, DT_INVALID}, {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}}; std::vector t1{ {ctx->MakeShape({1, 2, 3}), DT_FLOAT}, - {ctx->UnknownShape(), DT_INVALID}, + {unknown, DT_INVALID}, {ctx->MakeShape({4, 3, 2, 1}), DT_INT32}}; std::vector t2{ @@ -1256,10 +1259,10 @@ TEST_F(ShapeRefinerTest, IncrementalUpdates) { 0, std::vector{{shp, DT_FLOAT}}); refined = false; TF_ASSERT_OK(m.UpdateNode(dequeue, true /* relax */, &refined)); - EXPECT_FALSE(refined); + EXPECT_TRUE(refined); ctx = m.GetContext(dequeue); EXPECT_EQ("[?,7]", ctx->DebugString(ctx->output(0))); - EXPECT_FALSE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); + EXPECT_TRUE(SameHandle(ctx->Dim(ctx->output(0), 0), ctx->Dim(shp, 0))); // Inject a shape of the same handle and expect refined to not change. ctx = m.GetContext(queue); diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index fe0742e1db..f30272e250 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -403,15 +403,28 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value, existing); } -void InferenceContext::Relax(DimensionHandle d0, DimensionHandle d1, +void InferenceContext::Relax(DimensionHandle d_old, DimensionHandle d_new, DimensionHandle* out) { - if (d0.SameHandle(d1)) { - *out = d0; - } else if (!ValueKnown(d0) || !ValueKnown(d1)) { - *out = UnknownDim(); - } else if (Value(d0) == Value(d1)) { - *out = d0; + if (d_old.SameHandle(d_new)) { + *out = d_old; + } else if (!ValueKnown(d_old) && !ValueKnown(d_new)) { + // The node will be fed by the dimension d_new instead of d_old: any + // equality assertion between d_old and other input dimension on this node + // may not be true anymore, so forget them all. + ForgetMerges(); + // Return the new shape handle to force the relaxation to propagate to the + // fanout of the context. + *out = d_new; + } else if (!ValueKnown(d_new)) { + ForgetMerges(); + *out = d_new; + } else if (Value(d_old) == Value(d_new)) { + // Return the old shape handle. This will stop the relaxation in the fanout + // of the context. + *out = d_old; } else { + // Return a new handle that encodes a different unknown dim. + ForgetMerges(); *out = UnknownDim(); } } @@ -463,45 +476,48 @@ Status InferenceContext::MergePrefix(ShapeHandle s, ShapeHandle prefix, return Status::OK(); } -void InferenceContext::Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out) { - if (s0.SameHandle(s1)) { - *out = s0; +void InferenceContext::Relax(ShapeHandle s_old, ShapeHandle s_new, + ShapeHandle* out) { + if (s_old.SameHandle(s_new)) { + *out = s_old; return; - } else if (!RankKnown(s0) || !RankKnown(s1)) { - *out = UnknownShape(); + } else if (!RankKnown(s_new) || !s_old.IsSet()) { + ForgetMerges(); + *out = s_new; return; } - const int32 rank = Rank(s0); - if (rank != Rank(s1)) { + const int32 rank = Rank(s_old); + if (rank != Rank(s_new)) { + ForgetMerges(); *out = UnknownShape(); return; } - bool return_s0 = true; + bool return_s_old = true; for (int i = 0; i < rank; ++i) { - auto d0 = Dim(s0, i); - auto d1 = Dim(s1, i); + auto d0 = Dim(s_old, i); + auto d1 = Dim(s_new, i); if (d0.SameHandle(d1)) continue; auto v0 = Value(d0); auto v1 = Value(d1); if (v0 == kUnknownDim || v1 == kUnknownDim || v0 != v1) { - return_s0 = false; + return_s_old = false; break; } } - if (return_s0) { - *out = s0; + if (return_s_old) { + *out = s_old; return; } // Relax dims. std::vector dims(rank); for (int i = 0; i < rank; ++i) { - // Invariant for relax was checked earlier, so CHECK is ok. - Relax(Dim(s0, i), Dim(s1, i), &dims[i]); + Relax(Dim(s_old, i), Dim(s_new, i), &dims[i]); } + ForgetMerges(); *out = MakeShape(dims); } diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index b12d37b4c0..4a4ef12635 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -62,7 +62,7 @@ class DimensionHandle { private: DimensionHandle(const Dimension* dim) { ptr_ = dim; } - const Dimension* operator->() { return ptr_; } + const Dimension* operator->() const { return ptr_; } bool IsSet() const { return ptr_ != nullptr; } const Dimension* ptr_ = nullptr; @@ -104,7 +104,7 @@ class ShapeHandle { private: ShapeHandle(const Shape* shape) { ptr_ = shape; } - const Shape* operator->() { return ptr_; } + const Shape* operator->() const { return ptr_; } bool IsSet() const { return ptr_ != nullptr; } const Shape* ptr_ = nullptr; @@ -678,14 +678,17 @@ class InferenceContext { // Adds additional context to the given status. Status AttachContext(const Status& status); - // Relaxes and and returns the relaxed dimension in <*out>. If - // and have incompatible values, returns an error. + // Relaxes an existing value with a new value and returns the + // relaxed dimension in <*out>. If and have incompatible + // values, returns an error. // - // Note that <*out> may be set to or . - void Relax(DimensionHandle d0, DimensionHandle d1, DimensionHandle* out); - // Relaxes and and returns the relaxed shape in <*out>. See - // 'RelaxInput' function for full details and examples. - void Relax(ShapeHandle s0, ShapeHandle s1, ShapeHandle* out); + // Note that <*out> may be set to or . + void Relax(DimensionHandle d_old, DimensionHandle d_new, + DimensionHandle* out); + // Relaxes an existing shape with a new shape and returns the + // relaxed shape in <*out>. See 'RelaxInput' function for full details and + // examples. + void Relax(ShapeHandle s_old, ShapeHandle s_new, ShapeHandle* out); // Used to implement MergeInputHandleShapesAndTypes and // MergeOutputHandleShapesAndTypes. @@ -698,6 +701,12 @@ class InferenceContext { const std::vector& shapes_and_types, std::vector* to_update) TF_MUST_USE_RESULT; + // Forget all the previous merged shapes and dims. + void ForgetMerges() { + merged_shapes_.clear(); + merged_dims_.clear(); + } + ShapeManager shape_manager_; // inputs_, outputs_, and input_tensors_as_shapes_ refer to values from diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc index d03cc8ce6d..68156e63ca 100644 --- a/tensorflow/core/framework/shape_inference_test.cc +++ b/tensorflow/core/framework/shape_inference_test.cc @@ -544,9 +544,10 @@ TEST_F(ShapeInferenceTest, RelaxDim) { auto d_unknown_b = c.Dim(c.input(0), 4); DimensionHandle out; - // Relaxing anything with unknown returns a new unknown. + // Relaxing anything with unknown returns a new unknown or the existing + // unknown. Relax(&c, d2, d_unknown, &out); - EXPECT_FALSE(SameHandle(d_unknown, out)); + EXPECT_TRUE(SameHandle(d_unknown, out)); EXPECT_FALSE(SameHandle(d_unknown_b, out)); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); Relax(&c, d_unknown, d2, &out); @@ -554,7 +555,7 @@ TEST_F(ShapeInferenceTest, RelaxDim) { EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); Relax(&c, d_unknown, d_unknown_b, &out); EXPECT_FALSE(SameHandle(d_unknown, out)); - EXPECT_FALSE(SameHandle(d_unknown_b, out)); + EXPECT_TRUE(SameHandle(d_unknown_b, out)); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(out)); // Relaxing with self returns self. @@ -602,7 +603,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { EXPECT_EQ("?", c.DebugString(out)); Relax(&c, s_unknown, s_unknown_b, &out); EXPECT_FALSE(SameHandle(s_unknown, out)); - EXPECT_FALSE(SameHandle(s_unknown_b, out)); + EXPECT_TRUE(SameHandle(s_unknown_b, out)); EXPECT_EQ("?", c.DebugString(out)); // Relaxing with self returns self. @@ -623,7 +624,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { Relax(&c, s_u_2, s_1_u, &out); EXPECT_EQ("[?,?]", c.DebugString(out)); EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); - EXPECT_FALSE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1))); + EXPECT_TRUE(SameHandle(c.Dim(s_1_u, 1), c.Dim(out, 1))); auto s_u1 = c.UnknownShapeOfRank(1); auto s_u2 = c.UnknownShapeOfRank(1); Relax(&c, s_u1, s_u2, &out); @@ -637,7 +638,7 @@ TEST_F(ShapeInferenceTest, RelaxShape) { EXPECT_EQ("[?,?]", c.DebugString(out)); out = s_unknown; Relax(&c, s_1_3, s_u_2, &out); - EXPECT_FALSE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); + EXPECT_TRUE(SameHandle(c.Dim(s_u_2, 0), c.Dim(out, 0))); EXPECT_EQ("[?,?]", c.DebugString(out)); out = s_unknown; diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index d33d86df3a..46c6841023 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -249,106 +249,252 @@ typename DisjointSet::Rep* DisjointSet::Find(Handle value) { return root; } -// If a Merge node has a NextIteration node as an input then that input will -// try to forward an UnknownShape at graph construction time. However, the -// Merge shape function will always propagate an UnknownShape if any of its -// inputs are UnknownShapes. So we need to ignore the input from NextIteration -// nodes to propagate any known shape from the Merge node. -Status ShapeOfMergeNode(const Node* node, InferenceContext* c) { - ShapeHandle out = c->input(0); - if (!c->RankKnown(out)) { - out = c->UnknownShape(); - } else { - int32 rank = c->Rank(out); - for (const Edge* e : node->in_edges()) { - if (e->src()->IsNextIteration() || e->dst_input() <= 0) { - continue; - } - ShapeHandle input = c->input(e->dst_input()); - if (!c->RankKnown(input) || c->Rank(input) != rank) { - out = c->UnknownShape(); - break; - } +bool IsQueue(const Node& node) { + StringPiece type(node.type_string()); + return type.ends_with("QueueV2"); +} + +// Returns true if the node is an Enter op AND its input is a Queue. +bool IsEnterWithQueue(const Node& node) { + if (node.IsEnter()) { + const Node* in_node; + TF_CHECK_OK(node.input_node(0, &in_node)); + return IsQueue(*in_node); + } + return false; +} + +} // namespace + +// Queue of nodes to process. Nodes can be enqueued in any order, but will be +// dequeued in (roughly) topological order. Propagating shapes following a +// topological ordering isn't required for correctness but helps speed things up +// since it avoids processing the same node multiple times as its inputs +// information is refined. +class TopoQueue { + public: + void push(const Node* n) { queue_.insert(n); } + const Node* pop() { + CHECK(!empty()); + auto it = queue_.begin(); + const Node* n = *it; + queue_.erase(it); + return n; + } + + bool empty() const { return queue_.empty(); } + private: + // Graph nodes are created in (roughly) topological order. Therefore we can + // use their id to ensure they're sorted topologically. + struct CompareNodes { + bool operator()(const Node* lhs, const Node* rhs) const { + return lhs->id() > rhs->id(); + } + }; + std::set queue_; +}; + +// Merge and relax symbolic shapes. +// Each symbolic shape or dimension is represented by a handle. Unlike the TF +// shape refiner which creates new handles every time it processes an unknown +// shape/dimension, the symbolic shape refiner assigns a specific handle to each +// unknown shape/dimension of a given node. +class SymbolicShapeRefiner { + public: + explicit SymbolicShapeRefiner(ShapeRefiner* shape_refiner) + : shape_refiner_(shape_refiner) {} + + InferenceContext* GetContext(const Node* node) { + return shape_refiner_->GetContext(node); + } + Status UpdateNode(const Node* node, bool relax, bool* refined) { + return shape_refiner_->UpdateNode(node, relax, refined); + } + Status SetShape(const Node* node, int output_port, + shape_inference::ShapeHandle shape) { + return shape_refiner_->SetShape(node, output_port, shape); + } + + struct ShapeId { + const Node* node; + int port_id; + bool operator==(const ShapeId& other) const { + return node == other.node && port_id == other.port_id; + } + }; + struct HashShapeId { + std::size_t operator()(const ShapeId& shp) const { + return std::hash{}(shp.node) + shp.port_id; + } + }; + + struct DimId { + const Node* node; + int port_id; + int dim_index; + bool operator==(const DimId& other) const { + return node == other.node && port_id == other.port_id && + dim_index == other.dim_index; + } + }; + + struct HashDimId { + std::size_t operator()(const DimId& dim) const { + return std::hash{}(dim.node) + dim.port_id + dim.dim_index; + } + }; + + // Compute the shape of the tensors outputed by node 'node' at output port + // 'port_index' as the intersection of shape1 and shape2. + ShapeHandle OutputAsIntersection(const Node* node, int port_index, + ShapeHandle shape1, ShapeHandle shape2) { + if (shape1.SameHandle(shape2)) { + return shape1; + } + InferenceContext* ctx = shape_refiner_->GetContext(node); + ShapeHandle merged = shape1; + if (!ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) { + // Return either one since they're expected to represent the same value. + return shape1; + } else if (!ctx->RankKnown(shape2) && ctx->RankKnown(shape1)) { + return shape1; + } else if (ctx->RankKnown(shape2) && !ctx->RankKnown(shape1)) { + return shape2; + } else { + const int rank = ctx->Rank(shape1); + if (ctx->Rank(shape2) != rank) { + // We detected an inconsistency, return an unknown shape. This can + // happen in the fanout of a merge node since during the initial + // propagation we optimistically assume that all the inputs to the merge + // node have the same shape. + return GetUnknownOutputShape(node, port_index); + } for (int d = 0; d < rank; ++d) { - if (c->Value(c->Dim(input, d)) != c->Value(c->Dim(out, d))) { - TF_RETURN_IF_ERROR(c->ReplaceDim(out, d, c->UnknownDim(), &out)); + if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) { + if (ctx->Value(ctx->Dim(shape1, d)) != + ctx->Value(ctx->Dim(shape2, d))) { + DimensionHandle new_dim; + if (ctx->Value(ctx->Dim(shape1, d)) < 0) { + new_dim = ctx->Dim(shape2, d); + } else if (ctx->Value(ctx->Dim(shape2, d)) < 0) { + new_dim = ctx->Dim(shape1, d); + } else { + new_dim = GetUnknownOutputDim(node, port_index, d); + } + TF_CHECK_OK(ctx->ReplaceDim(merged, d, new_dim, &merged)); + } } } } + return merged; } - c->set_output(0, out); - c->set_output(1, c->Scalar()); - return Status::OK(); -} -// Manually propagate the input shape for Enter nodes and update any Merge node -// outputs. -Status UpdateEnter(ShapeRefiner* shape_refiner, const Node* node, bool relax, - std::queue* new_shapes) { - auto enter_ctx = shape_refiner->GetContext(node); - CHECK_NE(enter_ctx, nullptr); - for (int i = 0; i < enter_ctx->num_outputs(); i++) { - TF_RETURN_IF_ERROR(shape_refiner->SetShape(node, i, enter_ctx->input(0))); - } - for (const Edge* e : node->out_edges()) { - Node* dst = e->dst(); - if (dst->IsMerge()) { - bool updated = false; - TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(dst, relax, &updated)); - if (!updated) { - continue; + // Compute the shape of the tensors outputed by node 'node' at output port + // 'port_index' as the union of shape1 and shape2. + ShapeHandle OutputAsUnion(const Node* node, int port_index, + ShapeHandle shape1, ShapeHandle shape2) { + if (shape1.SameHandle(shape2)) { + return shape1; + } + InferenceContext* ctx = shape_refiner_->GetContext(node); + ShapeHandle relaxed = shape1; + const int rank = ctx->Rank(shape1); + if (!ctx->RankKnown(shape2) || ctx->Rank(shape2) != rank) { + relaxed = GetUnknownOutputShape(node, port_index); + } else { + for (int d = 0; d < rank; ++d) { + if (!ctx->Dim(shape1, d).SameHandle(ctx->Dim(shape2, d))) { + int64 val1 = ctx->Value(ctx->Dim(shape1, d)); + int64 val2 = ctx->Value(ctx->Dim(shape2, d)); + if (val1 != val2 || (val1 < 0 && val2 < 0)) { + DimensionHandle new_dim = GetUnknownOutputDim(node, port_index, d); + TF_CHECK_OK(ctx->ReplaceDim(relaxed, d, new_dim, &relaxed)); + } + } } - InferenceContext* merge_ctx = shape_refiner->GetContext(dst); - CHECK_NE(merge_ctx, nullptr); - TF_RETURN_IF_ERROR(ShapeOfMergeNode(dst, merge_ctx)); - new_shapes->push(dst); } + return relaxed; } - return Status::OK(); -} -// Propagates the shapes in the transitive fan-out of . -Status PropagateShapes(ShapeRefiner* shape_refiner, bool relax, - std::queue* new_shapes) { - while (!new_shapes->empty()) { - const Node* n = new_shapes->front(); - new_shapes->pop(); - for (const Node* fanout : n->out_nodes()) { - bool updated = false; - TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(fanout, relax, &updated)); - if (fanout->IsEnter()) { - TF_RETURN_IF_ERROR( - UpdateEnter(shape_refiner, fanout, relax, new_shapes)); - } else if (updated) { - // We want to avoid propagating through loops on the merge pass because - // the shapes are not guaranteed to converge. - if (!relax && fanout->IsNextIteration()) { + bool EquivalentShapes(ShapeHandle s1, ShapeHandle s2) const { + if (s1.SameHandle(s2)) { + return true; + } + if (InferenceContext::Rank(s1) != InferenceContext::Rank(s2)) { + return false; + } + if (!InferenceContext::RankKnown(s1) && !InferenceContext::RankKnown(s2)) { + return true; + } + const int rank = InferenceContext::Rank(s1); + for (int i = 0; i < rank; ++i) { + if (!InferenceContext::DimKnownRank(s1, i).SameHandle( + InferenceContext::DimKnownRank(s2, i))) { + int64 val1 = + InferenceContext::Value(InferenceContext::DimKnownRank(s1, i)); + int64 val2 = + InferenceContext::Value(InferenceContext::DimKnownRank(s2, i)); + if (val1 >= 0 && val2 >= 0 && val1 == val2) { continue; } - new_shapes->push(fanout); + return false; } } + return true; } - return Status::OK(); -} -bool IsQueue(const Node& node) { - StringPiece type(node.type_string()); - return type.ends_with("QueueV2"); -} + bool EquivalentShapesAndTypes(const std::vector& st1, + const std::vector& st2) const { + if (st1.size() != st2.size()) { + return false; + } + for (int i = 0; i < st1.size(); ++i) { + const ShapeAndType& s1 = st1[i]; + const ShapeAndType& s2 = st2[i]; + if (s1.dtype != s2.dtype) { + return false; + } + if (!EquivalentShapes(s1.shape, s2.shape)) { + return false; + } + } + return true; + } -// Returns true if the node is an Enter op AND its input is a Queue. -bool IsEnterWithQueue(const Node& node) { - if (node.IsEnter()) { - const Node* in_node; - TF_CHECK_OK(node.input_node(0, &in_node)); - return IsQueue(*in_node); + private: + // Return the one ShapeHandle used to denote a fully unknown shape for a node + // output. + ShapeHandle GetUnknownOutputShape(const Node* node, int index) { + ShapeId id{node, index}; + auto it = unknown_shapes_.find(id); + if (it != unknown_shapes_.end()) { + return it->second; + } + InferenceContext* c = shape_refiner_->GetContext(node); + ShapeHandle shp = c->UnknownShape(); + unknown_shapes_[id] = shp; + return shp; + } + // Return the one ShapeHandle used to denote a fully unknown dimension for a + // node output. + DimensionHandle GetUnknownOutputDim(const Node* node, int index, int dim_id) { + DimId id{node, index, dim_id}; + auto it = unknown_dims_.find(id); + if (it != unknown_dims_.end()) { + return it->second; + } + InferenceContext* c = shape_refiner_->GetContext(node); + DimensionHandle dim = c->UnknownDim(); + unknown_dims_[id] = dim; + return dim; } - return false; -} -} // namespace + ShapeRefiner* shape_refiner_; + + std::unordered_map unknown_shapes_; + std::unordered_map unknown_dims_; +}; // Keep track of shapes and dimensions in a graph. // In particular, use disjoint sets to track equivalence between shapes and @@ -401,24 +547,9 @@ class SymbolicShapeManager { DisjointSet dims_; }; -void GraphProperties::Relax(InferenceContext* c, ShapeHandle s0, ShapeHandle s1, - ShapeHandle* out) { - c->Relax(s0, s1, out); -} - -bool GraphProperties::SameDefinedShape(InferenceContext* c, ShapeHandle s0, - ShapeHandle s1) { - return ShapeRefiner::SameDefinedShape(c, s0, s1); -} - -bool GraphProperties::IsUpdatedShapesOrTypes( - InferenceContext* c, const std::vector& existing, - const std::vector& updated) { - return ShapeRefiner::IsUpdatedShapesOrTypes(c, existing, updated); -} - Status GraphProperties::MergeEnqueueShapesAndTypes( - const std::vector& shapes_and_types, InferenceContext* qctx, + SymbolicShapeRefiner* shape_refiner, const Node* qnode, + const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { return errors::InvalidArgument( @@ -434,13 +565,14 @@ Status GraphProperties::MergeEnqueueShapesAndTypes( DataTypeString(b.dtype)); } - TF_RETURN_IF_ERROR(qctx->Merge(a.shape, b.shape, &b.shape)); + b.shape = shape_refiner->OutputAsIntersection(qnode, i, a.shape, b.shape); } return Status::OK(); } Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( - const std::vector& shapes_and_types, InferenceContext* qctx, + SymbolicShapeRefiner* shape_refiner, const Node* qnode, + const std::vector& shapes_and_types, std::vector* queue_shapes_and_types) { if (shapes_and_types.size() != queue_shapes_and_types->size()) { return errors::InvalidArgument( @@ -456,11 +588,197 @@ Status GraphProperties::RelaxEnqueueShapesAndMergeTypes( DataTypeString(b.dtype)); } - Relax(qctx, a.shape, b.shape, &b.shape); + b.shape = shape_refiner->OutputAsUnion(qnode, i, a.shape, b.shape); } return Status::OK(); } +// If a Merge node has a NextIteration node as an input then that input will +// try to forward an UnknownShape at graph construction time. However, the +// Merge shape function will always propagate an UnknownShape if any of its +// inputs are UnknownShapes. So we need to ignore the input from NextIteration +// nodes to propagate any known shape from the Merge node. +Status GraphProperties::UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes) { + InferenceContext* c = shape_refiner->GetContext(node); + CHECK_NE(c, nullptr); + + ShapeHandle out; + bool out_initialized = false; + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + // Skip back edges during the initial propagation phase. This is equivalent + // to assuming that all the inputs to the merge nodes are fed by the same + // shape, and will be corrected as needed in the relaxation phase. + if (!relax && e->src()->IsNextIteration()) { + continue; + } + + InferenceContext* in = shape_refiner->GetContext(e->src()); + ShapeHandle input = in->output(e->src_output()); + if (relax) { + c->RelaxInput(e->dst_input(), input); + } else { + c->MergeInput(e->dst_input(), input); + } + if (!out_initialized) { + out_initialized = true; + out = input; + continue; + } + if (relax) { + out = shape_refiner->OutputAsUnion(node, 0, input, out); + } else { + out = shape_refiner->OutputAsIntersection(node, 0, input, out); + } + } + + if (!shape_refiner->EquivalentShapes(out, c->output(0))) { + c->set_output(0, out); + c->set_output(1, c->Scalar()); + new_shapes->push(node); + } + + return Status::OK(); +} + +// Manually propagate the input shape for Enter nodes and update any Merge node +// outputs. +Status GraphProperties::UpdateEnter(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes) { + auto enter_ctx = shape_refiner->GetContext(node); + CHECK_NE(enter_ctx, nullptr); + + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + InferenceContext* in = shape_refiner->GetContext(e->src()); + ShapeHandle input = in->output(e->src_output()); + if (!enter_ctx->output(0).SameHandle(input)) { + if (relax) { + enter_ctx->RelaxInput(0, input); + } else { + enter_ctx->MergeInput(0, input); + } + enter_ctx->set_output(0, input); + new_shapes->push(node); + } + } + return Status::OK(); +} + +Status GraphProperties::UpdateShapes(SymbolicShapeRefiner* shape_refiner, + bool relax, const Node* n, + TopoQueue* new_shapes) { + if (n->IsEnter()) { + // The Enter shape function always forwards an UnknownShape, so do the right + // thing here. + TF_RETURN_IF_ERROR(UpdateEnter(shape_refiner, n, relax, new_shapes)); + } else if (n->IsMerge()) { + // Properly handle merge nodes. + TF_RETURN_IF_ERROR(UpdateMergeNode(shape_refiner, n, relax, new_shapes)); + } else { + // Rely on regular TF shape refinement for all the other nodes. + bool updated = false; + TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(n, relax, &updated)); + if (updated) { + // We want to avoid propagating through loops on the merge pass because + // the shapes are not guaranteed to converge. + if (relax || !n->IsNextIteration()) { + new_shapes->push(n); + } + } + } + return Status::OK(); +} + +// Propagates the shapes in the transitive fan-out of . +Status GraphProperties::PropagateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, + const std::unordered_map>& + resources) { + do { + while (!new_shapes->empty()) { + const Node* n = new_shapes->pop(); + for (const Node* fanout : n->out_nodes()) { + TF_RETURN_IF_ERROR( + UpdateShapes(shape_refiner, relax, fanout, new_shapes)); + } + } + + for (const auto& resource : resources) { + // Resources need special handling: since the enqueue nodes are in the + // fanout of the queues, we need to manually propagate the shapes from + // enqueue node to the corresponding queue. + TF_RETURN_IF_ERROR(UpdateResource(resource.first, resource.second, + shape_refiner, relax, new_shapes)); + } + } while (!new_shapes->empty()); + + return Status::OK(); +} + +Status GraphProperties::UpdateResource( + const Node* qnode, const std::unordered_set& queue_inputs, + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes) { + // Proceed only if qnode is a queue or an Enter with queue input. + if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { + return Status::OK(); + } + auto qctx = shape_refiner->GetContext(qnode); + if (!qctx) { + return Status::OK(); + } + auto* queue_handle_data = qctx->output_handle_shapes_and_types(0); + + // Merge all inputs into the enqueue node, regardless of which phase we + // are in. + std::vector queue_shapes_and_types; + if (queue_handle_data) { + queue_shapes_and_types = *queue_handle_data; + } + for (const auto& node : queue_inputs) { + auto ctx = shape_refiner->GetContext(node); + if (!ctx) { + continue; + } + // TODO(bsteiner): handle EnqueueMany as well. + if (node->type_string().find("Enqueue") != std::string::npos && + node->type_string().find("EnqueueMany") == std::string::npos) { + std::vector shapes_and_types; + for (int i = 1; i < ctx->num_inputs(); ++i) { + shapes_and_types.push_back({ctx->input(i), node->input_type(i)}); + } + if (queue_shapes_and_types.empty()) { + queue_shapes_and_types = shapes_and_types; + } else { + if (relax) { + TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( + shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); + } else { + TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( + shape_refiner, qnode, shapes_and_types, &queue_shapes_and_types)); + } + } + } + } + + if (queue_handle_data == nullptr || + !shape_refiner->EquivalentShapesAndTypes(*queue_handle_data, + queue_shapes_and_types)) { + qctx->set_output_handle_shapes_and_types(0, queue_shapes_and_types); + + new_shapes->push(qnode); + } + + return Status::OK(); +} + Status GraphProperties::InferStatically() { Graph graph(OpRegistry::Global()); FunctionLibraryDefinition function_library(graph.op_registry(), @@ -493,146 +811,35 @@ Status GraphProperties::InferStatically() { } if (node->IsEnter()) { enter_nodes.insert(node); - } else if (node->IsNextIteration()) { - for (const Node* output : node->out_nodes()) { - if (output->IsMerge()) { - merge_nodes.insert(output); - } - } + } else if (node->IsMerge()) { + merge_nodes.insert(node); } } - // Propagate the initial shapes of Enter nodes manually (the Enter shape - // function always forwards an UnknownShape). - std::queue new_shapes; - for (const Node* node : enter_nodes) { - TF_RETURN_IF_ERROR( - UpdateEnter(&shape_refiner, node, false /* relax */, &new_shapes)); - } - TF_RETURN_IF_ERROR( - PropagateShapes(&shape_refiner, false /* relax */, &new_shapes)); + SymbolicShapeRefiner refiner(&shape_refiner); // We propagate shapes through the graph in two phases. In the first phase, we - // exclusively merge shapes but we do not propagate shapes through loops. Then - // on the second phase, we exclusively relax shapes and propagate shapes - // through loops until reaching fixed point. + // exclusively merge shapes but we do not propagate shapes through the + // backedge of loops (i.e. the NextIteration node). Then on the second phase, + // we exclusively relax shapes and propagate shapes through loops until + // reaching fixed point. for (int relax = 0; relax < 2; relax++) { - // We don't update Merge nodes with the input of NextIteration nodes on the - // merge pass. So we do that at the beginning of the relax pass instead. - if (relax) { - bool updated = false; - for (const Node* node : merge_nodes) { - TF_RETURN_IF_ERROR( - shape_refiner.UpdateNode(node, false /* relax */, &updated)); - } + TopoQueue new_shapes; + // Force the propagation of shapes of Enter nodes manually (the Enter shape + // function always forwards an UnknownShape). + for (const Node* node : enter_nodes) { + TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); } - - bool done = true; - do { - if (relax) { - // Propagate shapes through any loops in the graph by relaxing. - for (const Node* node : merge_nodes) { - new_shapes.push(node); - } - TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes)); - } - - // If we found a resource, try to propagate the shapes through it. - new_shapes = std::queue(); - for (const auto& resource_data : resources) { - const Node* qnode = resource_data.first; - // Proceed only if qnode is a queue or an Enter with queue input. - if (!IsQueue(*qnode) && !IsEnterWithQueue(*qnode)) { - continue; - } - auto qctx = shape_refiner.GetContext(qnode); - if (!qctx) { - continue; - } - - // Check to see if the shape is fully defined. - auto* queue_handle_data = qctx->output_handle_shapes_and_types(0); - if (queue_handle_data != nullptr) { - bool fully_defined = true; - for (const auto& shape_and_type : *queue_handle_data) { - if (!qctx->FullyDefined(shape_and_type.shape) || - shape_and_type.dtype == DT_INVALID) { - fully_defined = false; - } - } - // If we are merging, then we are done. If we are relaxing, then we - // could potentially propagate a less specific shape. - if (fully_defined && !relax) { - continue; - } - } - - // Merge all inputs into the enqueue node, regardless of which phase we - // are in. - std::vector queue_shapes_and_types; - for (const auto& node : resource_data.second) { - auto ctx = shape_refiner.GetContext(node); - if (!ctx) { - continue; - } - // TODO(bsteiner): handle EnqueueMany as well. - if (node->type_string().find("Enqueue") != std::string::npos && - node->type_string().find("EnqueueMany") == std::string::npos) { - std::vector shapes_and_types; - for (int i = 1; i < ctx->num_inputs(); ++i) { - shapes_and_types.push_back({ctx->input(i), node->input_type(i)}); - } - - if (queue_shapes_and_types.empty()) { - queue_shapes_and_types = shapes_and_types; - } else { - TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( - shapes_and_types, qctx, &queue_shapes_and_types)); - } - } - } - // Combine the input shapes with the existing output shape. We either - // merge or relax depending on which phase we are in. - if (queue_handle_data != nullptr) { - if (relax) { - TF_RETURN_IF_ERROR(RelaxEnqueueShapesAndMergeTypes( - *queue_handle_data, qctx, &queue_shapes_and_types)); - } else { - TF_RETURN_IF_ERROR(MergeEnqueueShapesAndTypes( - *queue_handle_data, qctx, &queue_shapes_and_types)); - } - } - // Set the output ShapeAndType handles. If we successfully update the - // resource node, add its fan-out to the queue. - const std::vector* outputs = - qctx->output_handle_shapes_and_types(0); - std::vector existing_outputs; - if (outputs) { - existing_outputs = *outputs; - } - if (!queue_shapes_and_types.empty()) { - if (!relax && qctx->MergeOutputHandleShapesAndTypes( - 0, queue_shapes_and_types)) { - new_shapes.push(qnode); - } else if (relax && qctx->RelaxOutputHandleShapesAndMergeTypes( - 0, queue_shapes_and_types)) { - if (IsUpdatedShapesOrTypes( - qctx, existing_outputs, - *qctx->output_handle_shapes_and_types(0))) { - new_shapes.push(qnode); - } - } - } - } - // Propagate the shapes in the transitive fan-out of the queue. - done = new_shapes.empty(); - if (!done) { - TF_RETURN_IF_ERROR(PropagateShapes(&shape_refiner, relax, &new_shapes)); - } - } while (!done); + // Seed the propagation of shapes through merge nodes. + for (const Node* node : merge_nodes) { + TF_RETURN_IF_ERROR(UpdateShapes(&refiner, relax, node, &new_shapes)); + } + // Propagate shapes normally. + TF_RETURN_IF_ERROR( + PropagateShapes(&refiner, relax, &new_shapes, resources)); } - // Track shapes globally accross the graph. + // Track shapes globally across the graph. SymbolicShapeManager shape_manager; bool found_error = false; for (const Node* const node : graph.nodes()) { diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index a6aed0bba6..37c8654541 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -26,6 +26,9 @@ limitations under the License. namespace tensorflow { namespace grappler { +class SymbolicShapeRefiner; +class TopoQueue; + // A TensorFlow model to optimize. // Models are represented by the combination of a graph, one of more fetch // nodes, and potentially a set of nodes to feed. @@ -64,31 +67,42 @@ class GraphProperties { // Merges shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. - Status MergeEnqueueShapesAndTypes( + static Status MergeEnqueueShapesAndTypes( + SymbolicShapeRefiner* shape_refiner, const Node* qnode, const std::vector& shapes_and_types, - shape_inference::InferenceContext* qctx, std::vector* queue_shapes_and_types); // Relaxes shapes , determined from an EnqueueV2 node, into // <*queue_shapes_and_types>. - Status RelaxEnqueueShapesAndMergeTypes( + static Status RelaxEnqueueShapesAndMergeTypes( + SymbolicShapeRefiner* shape_refiner, const Node* qnode, const std::vector& shapes_and_types, - shape_inference::InferenceContext* qctx, std::vector* queue_shapes_and_types); - // This gives access to private function of InferenceContext. - static void Relax(shape_inference::InferenceContext* c, - shape_inference::ShapeHandle s0, - shape_inference::ShapeHandle s1, - shape_inference::ShapeHandle* out); - - // These give access to private functions of ShapeRefiner. - static bool SameDefinedShape(shape_inference::InferenceContext* c, - shape_inference::ShapeHandle s0, - shape_inference::ShapeHandle s1); - static bool IsUpdatedShapesOrTypes( - shape_inference::InferenceContext* c, - const std::vector& existing, - const std::vector& updated); + // Update the shapes for qnode. If output shapes of qnode have changed, + // enqueue its fanout in 'new_shapes'. + static Status UpdateResource( + const Node* qnode, const std::unordered_set& queue_inputs, + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes); + + // Update the output shapes of a Merge node, and enqueue its fanout in + // new_shapes if needed. + static Status UpdateMergeNode(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes); + // Process the Enter node, and enqueue its fanout in new_shapes if needed. + static Status UpdateEnter(SymbolicShapeRefiner* shape_refiner, + const Node* node, bool relax, + TopoQueue* new_shapes); + // Update the shapes for node 'n'. If output shapes for n have changed, + // enqueue its fanout in 'new_shapes'. + static Status UpdateShapes(SymbolicShapeRefiner* shape_refiner, bool relax, + const Node* n, TopoQueue* new_shapes); + // Propagate the shapes for the nodes enqueued in new_shapes and their + // transitive fanout until a fixed point is reached. + static Status PropagateShapes( + SymbolicShapeRefiner* shape_refiner, bool relax, TopoQueue* new_shapes, + const std::unordered_map>& + resources); }; } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index f785f627e1..74d48158a9 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -362,7 +362,7 @@ TEST_F(GraphPropertiesTest, WhileLoop) { /* with tf.Graph().as_default(): i0 = tf.constant(0) - m0 = tf.ones([2, 2]) + m0 = tf.placeholder([-1, 2]) c = lambda i, m: i < 10 b = lambda i, m: [i+1, tf.concat([m, m], axis=0)] r = tf.while_loop( @@ -387,6 +387,14 @@ TEST_F(GraphPropertiesTest, WhileLoop) { EXPECT_EQ(DT_FLOAT, prop.dtype()); EXPECT_EQ("float: [-1,2]", PropToString(prop)); } + + // The loop outputs batch dim should be different from the input batch dim + // since we concatenated along the batch dim. + auto shape_in = properties.GetOutputProperties("ones").at(0).shape(); + auto shape_out = properties.GetOutputProperties("while/Exit_1").at(0).shape(); + EXPECT_GE(-2, shape_in.dim(0).size()); + EXPECT_GE(-2, shape_out.dim(0).size()); + EXPECT_NE(shape_in.dim(0).size(), shape_out.dim(0).size()); } TEST_F(GraphPropertiesTest, NestedLoop) { @@ -750,6 +758,10 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { Output e = ops::Add(s.WithOpName("e"), c, d); Output f = ops::Add(s.WithOpName("f"), a, c); + Output zero = ops::Const(s.WithOpName("zero"), 0.0f, {}); + Output g = ops::Shape(s.WithOpName("g"), c); + Output h = ops::Fill(s.WithOpName("h"), g, zero); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); @@ -773,15 +785,20 @@ TEST_F(GraphPropertiesTest, SymbolicShapes) { EXPECT_EQ(shape_b.dim(0).size(), shape_d.dim(0).size()); const auto shape_e = properties.GetOutputProperties("e").at(0).shape(); - EXPECT_EQ(2, shape_e.dim_size()); + ASSERT_EQ(2, shape_e.dim_size()); EXPECT_EQ(shape_e.dim(0).size(), shape_c.dim(0).size()); EXPECT_NE(shape_e.dim(1).size(), shape_c.dim(1).size()); EXPECT_NE(shape_e.dim(0).size(), shape_d.dim(0).size()); const auto shape_f = properties.GetOutputProperties("f").at(0).shape(); - EXPECT_EQ(2, shape_f.dim_size()); + ASSERT_EQ(2, shape_f.dim_size()); EXPECT_EQ(shape_f.dim(0).size(), shape_a.dim(0).size()); EXPECT_EQ(shape_f.dim(1).size(), shape_a.dim(1).size()); + + const auto shape_h = properties.GetOutputProperties("h").at(0).shape(); + ASSERT_EQ(2, shape_f.dim_size()); + EXPECT_EQ(shape_h.dim(0).size(), shape_c.dim(0).size()); + EXPECT_EQ(shape_h.dim(1).size(), shape_c.dim(1).size()); } TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) { diff --git a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt index c11833bd1a..fbc3659d9a 100644 --- a/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt +++ b/tensorflow/core/grappler/costs/graph_properties_testdata/while_loop.pbtxt @@ -21,7 +21,7 @@ node { } node { name: "ones" - op: "Const" + op: "PlaceholderV2" attr { key: "dtype" value { @@ -29,19 +29,15 @@ node { } } attr { - key: "value" + key: "shape" value { - tensor { - dtype: DT_FLOAT - tensor_shape { - dim { - size: 2 - } - dim { - size: 2 - } + shape { + dim { + size: -1 + } + dim { + size: 2 } - float_val: 1.0 } } } -- GitLab From 98ef53d5541049655c9160130595253fdefd4590 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 17 Nov 2017 12:49:48 -0800 Subject: [PATCH 0064/1225] Remove `tf.data.SparseType` and instead use `tf.data.Dataset.output_classes` as the means for recording the class type of the dataset elements. PiperOrigin-RevId: 176147440 --- .../contrib/data/python/ops/batching.py | 40 ++- .../contrib/data/python/ops/dataset_ops.py | 4 + .../contrib/data/python/ops/error_ops.py | 9 +- .../contrib/data/python/ops/grouping.py | 36 +- .../contrib/data/python/ops/interleave_ops.py | 22 +- tensorflow/contrib/data/python/ops/readers.py | 4 + .../contrib/data/python/ops/scan_ops.py | 23 +- tensorflow/python/data/__init__.py | 2 - tensorflow/python/data/ops/dataset_ops.py | 254 ++++++++++---- tensorflow/python/data/ops/iterator_ops.py | 93 +++-- tensorflow/python/data/ops/readers.py | 12 + tensorflow/python/data/util/BUILD | 3 + tensorflow/python/data/util/sparse.py | 150 ++++----- tensorflow/python/data/util/sparse_test.py | 318 ++++++++++++++---- tensorflow/python/kernel_tests/BUILD | 17 +- tensorflow/python/ops/sparse_ops.py | 2 +- .../api/golden/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../golden/tensorflow.data.-iterator.pbtxt | 10 +- .../golden/tensorflow.data.-sparse-type.pbtxt | 13 - .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + .../tools/api/golden/tensorflow.data.pbtxt | 4 - 23 files changed, 728 insertions(+), 304 deletions(-) delete mode 100644 tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt diff --git a/tensorflow/contrib/data/python/ops/batching.py b/tensorflow/contrib/data/python/ops/batching.py index cc63baed81..1ac059b374 100644 --- a/tensorflow/contrib/data/python/ops/batching.py +++ b/tensorflow/contrib/data/python/ops/batching.py @@ -112,8 +112,10 @@ def filter_irregular_batches(batch_size): tensor_batch_size = ops.convert_to_tensor( batch_size, dtype=dtypes.int64, name="batch_size") - flattened = _RestructuredDataset(dataset, - tuple(nest.flatten(dataset.output_types))) + flattened = _RestructuredDataset( + dataset, + tuple(nest.flatten(dataset.output_types)), + output_classes=tuple(nest.flatten(dataset.output_classes))) def _predicate(*xs): """Return `True` if this element is a full batch.""" @@ -135,7 +137,11 @@ def filter_irregular_batches(batch_size): known_shapes = nest.map_structure(_set_first_dimension, dataset.output_shapes) - return _RestructuredDataset(filtered, dataset.output_types, known_shapes) + return _RestructuredDataset( + filtered, + dataset.output_types, + known_shapes, + output_classes=dataset.output_classes) return _apply_fn @@ -237,6 +243,10 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset): output_shapes=self.output_shapes, output_types=self.output_types) + @property + def output_classes(self): + return (ops.Tensor, ops.Tensor, ops.Tensor) + @property def output_shapes(self): num_elements = tensor_shape.Dimension(None) @@ -252,7 +262,11 @@ class DenseToSparseBatchDataset(dataset_ops.Dataset): class _RestructuredDataset(dataset_ops.Dataset): """An internal helper for changing the structure and shape of a dataset.""" - def __init__(self, dataset, output_types, output_shapes=None): + def __init__(self, + dataset, + output_types, + output_shapes=None, + output_classes=None): """Creates a new dataset with the given output types and shapes. The given `dataset` must have a structure that is convertible: @@ -268,6 +282,8 @@ class _RestructuredDataset(dataset_ops.Dataset): output_types: A nested structure of `tf.DType` objects. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects. If omitted, the shapes will be inherited from `dataset`. + output_classes: (Optional.) A nested structure of class types. + If omitted, the class types will be inherited from `dataset`. Raises: ValueError: If either `output_types` or `output_shapes` is not compatible @@ -307,10 +323,21 @@ class _RestructuredDataset(dataset_ops.Dataset): output_shapes)) self._output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + # Inherit class types from the original `dataset`. + self._output_classes = nest.pack_sequence_as(output_types, + nest.flatten( + dataset.output_classes)) + else: + self._output_classes = output_classes def _as_variant_tensor(self): return self._dataset._as_variant_tensor() # pylint: disable=protected-access + @property + def output_classes(self): + return self._output_classes + @property def output_types(self): return self._output_types @@ -345,8 +372,9 @@ class _MapAndBatchDataset(dataset_ops.MapDataset): batch_size=self._batch_size, num_parallel_batches=self._num_parallel_batches, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) # pylint: enable=protected-access @property diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 45d6dbe743..863c94ef9f 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -48,6 +48,10 @@ class Dataset(dataset_ops.Dataset): def _as_variant_tensor(self): return self._dataset._as_variant_tensor() # pylint: disable=protected-access + @property + def output_classes(self): + return self._dataset.output_classes + @property def output_shapes(self): return self._dataset.output_shapes diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 194b611513..aa629cba47 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -63,9 +63,14 @@ class IgnoreErrorsDataset(dataset_ops.Dataset): def _as_variant_tensor(self): return gen_dataset_ops.ignore_errors_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 86337271bc..ef91c56726 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -88,15 +88,21 @@ def group_by_window(key_func, class _VariantDataset(dataset_ops.Dataset): """A Dataset wrapper for a tf.variant-typed function argument.""" - def __init__(self, dataset_variant, output_types, output_shapes): + def __init__(self, dataset_variant, output_types, output_shapes, + output_classes): super(_VariantDataset, self).__init__() self._dataset_variant = dataset_variant self._output_types = output_types self._output_shapes = output_shapes + self._output_classes = output_classes def _as_variant_tensor(self): return self._dataset_variant + @property + def output_classes(self): + return self._output_classes + @property def output_shapes(self): return self._output_shapes @@ -138,17 +144,21 @@ class GroupByWindowDataset(dataset_ops.Dataset): def _make_key_func(self, key_func, input_dataset): """Make wrapping Defun for key_func.""" - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_key_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) # pylint: disable=protected-access if dataset_ops._should_unpack_args(nested_args): ret = key_func(*nested_args) @@ -170,14 +180,15 @@ class GroupByWindowDataset(dataset_ops.Dataset): def tf_reduce_func(key, window_dataset_variant): """A wrapper for Defun that facilitates shape inference.""" key.set_shape([]) - window_dataset = _VariantDataset(window_dataset_variant, - input_dataset.output_types, - input_dataset.output_shapes) + window_dataset = _VariantDataset( + window_dataset_variant, input_dataset.output_types, + input_dataset.output_shapes, input_dataset.output_classes) if not isinstance(window_dataset, dataset_ops.Dataset): raise TypeError("`window_dataset` must return a `Dataset` object.") output_dataset = reduce_func(key, window_dataset) if not isinstance(output_dataset, dataset_ops.Dataset): raise TypeError("`reduce_func` must return a `Dataset` object.") + self._output_classes = output_dataset.output_classes self._output_types = output_dataset.output_types self._output_shapes = output_dataset.output_shapes return output_dataset._as_variant_tensor() # pylint: disable=protected-access @@ -185,6 +196,10 @@ class GroupByWindowDataset(dataset_ops.Dataset): self._reduce_func = tf_reduce_func self._reduce_func.add_to_graph(ops.get_default_graph()) + @property + def output_classes(self): + return self._output_classes + @property def output_shapes(self): return self._output_shapes @@ -203,5 +218,6 @@ class GroupByWindowDataset(dataset_ops.Dataset): reduce_func=self._reduce_func, window_size_func=self._window_size_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 830642c040..53324e06e7 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -36,17 +36,21 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): super(ParallelInterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access dataset = map_func(*nested_args) else: @@ -55,6 +59,7 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): if not isinstance(dataset, dataset_ops.Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -79,8 +84,13 @@ class ParallelInterleaveDataset(dataset_ops.Dataset): self._sloppy, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 632082b5f1..bb47832fe9 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -269,6 +269,10 @@ class _SqlDataset(dataset_ops.Dataset): nest.flatten(self.output_types), nest.flatten(self.output_shapes)) + @property + def output_classes(self): + return nest.map_structure(lambda _: ops.Tensor, self._output_types) + @property def output_shapes(self): return nest.map_structure(lambda _: tensor_shape.TensorShape([]), diff --git a/tensorflow/contrib/data/python/ops/scan_ops.py b/tensorflow/contrib/data/python/ops/scan_ops.py index 2cfc0709cd..7c595b1814 100644 --- a/tensorflow/contrib/data/python/ops/scan_ops.py +++ b/tensorflow/contrib/data/python/ops/scan_ops.py @@ -53,6 +53,7 @@ class _ScanDataset(dataset_ops.Dataset): [t.dtype for t in nest.flatten(self._initial_state)]) # Will be populated by calling `tf_scan_func`. + self._output_classes = None self._output_shapes = None self._output_types = None @@ -68,13 +69,16 @@ class _ScanDataset(dataset_ops.Dataset): flat_new_state_shapes = [] @function.Defun(*(flat_state_types + nest.flatten( - sparse.unwrap_sparse_types(input_dataset.output_types)))) + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes)))) # pylint: disable=protected-access def tf_scan_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the state and input_dataset. - for arg, shape in zip( - args, - flat_state_shapes + nest.flatten(input_dataset.output_shapes)): + # TODO(b/69424092): Check that neither inputs nor outputs are sparse. + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) # pylint: disable=protected-access + for arg, shape in zip(args, + flat_state_shapes + nest.flatten(dense_shapes)): arg.set_shape(shape) pivot = len(flat_state_shapes) @@ -108,6 +112,8 @@ class _ScanDataset(dataset_ops.Dataset): "state. Expected %s; got %s." % (self._state_types, nest.pack_sequence_as( self._state_types, [t.dtype for t in flat_new_state]))) + self._output_classes = nest.pack_sequence_as( + output_value, [ops.Tensor for _ in flat_output_value]) self._output_types = nest.pack_sequence_as( output_value, [t.dtype for t in flat_output_value]) @@ -147,8 +153,13 @@ class _ScanDataset(dataset_ops.Dataset): self._scan_func.captured_inputs, f=self._scan_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py index 504500d245..239f9b0d59 100644 --- a/tensorflow/python/data/__init__.py +++ b/tensorflow/python/data/__init__.py @@ -21,7 +21,6 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@FixedLengthRecordDataset @@TextLineDataset @@TFRecordDataset -@@SparseType """ from __future__ import absolute_import @@ -34,7 +33,6 @@ from tensorflow.python.data.ops.iterator_ops import Iterator from tensorflow.python.data.ops.readers import FixedLengthRecordDataset from tensorflow.python.data.ops.readers import TextLineDataset from tensorflow.python.data.ops.readers import TFRecordDataset -from tensorflow.python.data.util.sparse import SparseType # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 5f981e2670..d434c8e522 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -97,13 +97,15 @@ class Dataset(object): container="", shared_name=shared_name, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) with ops.colocate_with(iterator_resource): initializer = gen_dataset_ops.make_iterator(self._as_variant_tensor(), iterator_resource) return iterator_ops.Iterator(iterator_resource, initializer, - self.output_types, self.output_shapes) + self.output_types, self.output_shapes, + self.output_classes) def make_one_shot_iterator(self): """Creates an `Iterator` for enumerating the elements of this dataset. @@ -144,9 +146,23 @@ class Dataset(object): gen_dataset_ops.one_shot_iterator( dataset_factory=_make_dataset, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)), None, - self.output_types, self.output_shapes) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, + self.output_classes))), None, + self.output_types, self.output_shapes, self.output_classes) + + @abc.abstractproperty + def output_classes(self): + """Returns the class of each component of an element of this dataset. + + The expected values are `tf.Tensor` and `tf.SparseTensor`. + + Returns: + A nested structure of Python `type` objects corresponding to each + component of an element of this dataset. + """ + raise NotImplementedError("Dataset.output_classes") @abc.abstractproperty def output_shapes(self): @@ -163,9 +179,8 @@ class Dataset(object): """Returns the type of each component of an element of this dataset. Returns: - A nested structure of `tf.DType` (or `tf.data.SparseType`) objects - corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an - element of this dataset. + A nested structure of `tf.DType` objects corresponding to each component + of an element of this dataset. """ raise NotImplementedError("Dataset.output_types") @@ -882,7 +897,13 @@ class TensorDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.tensor_dataset( nest.flatten(self._tensors), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return nest.pack_sequence_as( + self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) @property def output_shapes(self): @@ -915,7 +936,13 @@ class TensorSliceDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.tensor_slice_dataset( nest.flatten(self._tensors), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return nest.pack_sequence_as( + self._tensors, [ops.Tensor for _ in nest.flatten(self._tensors)]) @property def output_shapes(self): @@ -945,6 +972,10 @@ class SparseTensorSliceDataset(Dataset): self._sparse_tensor.indices, self._sparse_tensor.values, self._sparse_tensor.dense_shape) + @property + def output_classes(self): + return (ops.Tensor, ops.Tensor, ops.Tensor) + @property def output_shapes(self): indices_shape = self._sparse_tensor.indices.get_shape() @@ -994,6 +1025,12 @@ class ZipDataset(Dataset): ]) # pylint: enable=protected-access + @property + def output_classes(self): + return nest.pack_sequence_as( + self._datasets, + [ds.output_classes for ds in nest.flatten(self._datasets)]) + @property def output_shapes(self): return nest.pack_sequence_as( @@ -1030,11 +1067,16 @@ class ConcatenateDataset(Dataset): return gen_dataset_ops.concatenate_dataset( self._input_dataset._as_variant_tensor(), self._dataset_to_concatenate._as_variant_tensor(), - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) # pylint: enable=protected-access + @property + def output_classes(self): + return self._input_dataset.output_classes + @property def output_shapes(self): return nest.pack_sequence_as(self._input_dataset.output_shapes, [ @@ -1066,9 +1108,14 @@ class RepeatDataset(Dataset): return gen_dataset_ops.repeat_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1111,9 +1158,14 @@ class RangeDataset(Dataset): start=self._start, stop=self._stop, step=self._step, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return ops.Tensor @property def output_shapes(self): @@ -1138,9 +1190,14 @@ class CacheDataset(Dataset): return gen_dataset_ops.cache_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access filename=self._filename, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1186,9 +1243,14 @@ class ShuffleDataset(Dataset): seed=self._seed, seed2=self._seed2, reshuffle_each_iteration=self._reshuffle_each_iteration, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1212,9 +1274,14 @@ class TakeDataset(Dataset): return gen_dataset_ops.take_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1238,9 +1305,14 @@ class SkipDataset(Dataset): return gen_dataset_ops.skip_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access count=self._count, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1257,7 +1329,7 @@ class BatchDataset(Dataset): def __init__(self, input_dataset, batch_size): """See `Dataset.batch()` for details.""" super(BatchDataset, self).__init__() - if sparse.any_sparse(input_dataset.output_types): + if sparse.any_sparse(input_dataset.output_classes): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset @@ -1268,9 +1340,14 @@ class BatchDataset(Dataset): return gen_dataset_ops.batch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access batch_size=self._batch_size, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1330,7 +1407,7 @@ class PaddedBatchDataset(Dataset): def __init__(self, input_dataset, batch_size, padded_shapes, padding_values): """See `Dataset.batch()` for details.""" super(PaddedBatchDataset, self).__init__() - if sparse.any_sparse(input_dataset.output_types): + if sparse.any_sparse(input_dataset.output_classes): # TODO(b/63669786): support batching of sparse tensors raise TypeError("Batching of sparse tensors is not currently supported") self._input_dataset = input_dataset @@ -1364,7 +1441,12 @@ class PaddedBatchDataset(Dataset): for s in nest.flatten(self._padded_shapes) ], padding_values=nest.flatten(self._padding_values), - output_shapes=nest.flatten(self.output_shapes)) + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1393,20 +1475,25 @@ class MapDataset(Dataset): super(MapDataset, self).__init__() self._input_dataset = input_dataset + self._output_classes = None self._output_shapes = None self._output_types = None - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): ret = map_func(*nested_args) else: @@ -1425,16 +1512,17 @@ class MapDataset(Dataset): if isinstance(ret, list): ret = tuple(ret) - # Identify components that hold sparse tensor values. - types = sparse.get_sparse_types(ret) + self._output_classes = sparse.get_classes(ret) + self._output_shapes = nest.pack_sequence_as( + ret, [t.get_shape() for t in nest.flatten(ret)]) + self._output_types = nest.pack_sequence_as( + ret, [t.dtype for t in nest.flatten(ret)]) + # Serialize any sparse tensors and convert result to tensors. ret = nest.pack_sequence_as(ret, [ ops.convert_to_tensor(t) for t in nest.flatten(sparse.serialize_sparse_tensors(ret)) ]) - self._output_shapes = nest.pack_sequence_as( - types, [t.get_shape() for t in nest.flatten(ret)]) - self._output_types = sparse.wrap_sparse_types(ret, types) return nest.flatten(ret) self._map_func = tf_map_func @@ -1447,8 +1535,13 @@ class MapDataset(Dataset): self._map_func.captured_inputs, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1478,8 +1571,9 @@ class ParallelMapDataset(MapDataset): f=self._map_func, num_parallel_calls=self._num_parallel_calls, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) # pylint: enable=protected-access @@ -1491,17 +1585,21 @@ class FlatMapDataset(Dataset): super(FlatMapDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1510,6 +1608,7 @@ class FlatMapDataset(Dataset): if not isinstance(dataset, Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -1524,8 +1623,13 @@ class FlatMapDataset(Dataset): self._map_func.captured_inputs, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1545,17 +1649,21 @@ class InterleaveDataset(Dataset): super(InterleaveDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: @@ -1564,6 +1672,7 @@ class InterleaveDataset(Dataset): if not isinstance(dataset, Dataset): raise TypeError("`map_func` must return a `Dataset` object.") + self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes @@ -1585,8 +1694,13 @@ class InterleaveDataset(Dataset): self._block_length, f=self._map_func, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes @property def output_shapes(self): @@ -1605,17 +1719,21 @@ class FilterDataset(Dataset): super(FilterDataset, self).__init__() self._input_dataset = input_dataset - @function.Defun( - *nest.flatten(sparse.unwrap_sparse_types(input_dataset.output_types))) + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) def tf_predicate(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types) + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) if _should_unpack_args(nested_args): ret = predicate(*nested_args) else: @@ -1637,8 +1755,13 @@ class FilterDataset(Dataset): other_arguments=self._predicate.captured_inputs, predicate=self._predicate, output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types)), - output_shapes=nest.flatten(self.output_shapes)) + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): @@ -1663,9 +1786,14 @@ class PrefetchDataset(Dataset): return gen_dataset_ops.prefetch_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access buffer_size=self._buffer_size, - output_shapes=nest.flatten(self.output_shapes), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), output_types=nest.flatten( - sparse.unwrap_sparse_types(self.output_types))) + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes @property def output_shapes(self): diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 987a9b53ad..663bed07b2 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -29,7 +29,7 @@ class Iterator(object): """Represents the state of iterating through a `Dataset`.""" def __init__(self, iterator_resource, initializer, output_types, - output_shapes): + output_shapes, output_classes): """Creates a new iterator from the given iterator resource. Note: Most users will not call this initializer directly, and will @@ -41,21 +41,27 @@ class Iterator(object): iterator. initializer: A `tf.Operation` that should be run to initialize this iterator. - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. + output_classes: A nested structure of Python `type` object corresponding + to each + component of an element of this iterator. """ self._iterator_resource = iterator_resource self._initializer = initializer + self._output_classes = output_classes self._output_types = output_types self._output_shapes = output_shapes self._string_handle = gen_dataset_ops.iterator_to_string_handle( self._iterator_resource) @staticmethod - def from_structure(output_types, output_shapes=None, shared_name=None): + def from_structure(output_types, + output_shapes=None, + shared_name=None, + output_classes=None): """Creates a new, uninitialized `Iterator` with the given structure. This iterator-constructing method can be used to create an iterator that @@ -102,15 +108,17 @@ class Iterator(object): ``` Args: - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. shared_name: (Optional.) If non-empty, this iterator will be shared under the given name across multiple sessions that share the same devices (e.g. when using a remote server). + output_classes: (Optional.) A nested structure of Python `type` objects + corresponding to each component of an element of this iterator. If + omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. @@ -126,18 +134,24 @@ class Iterator(object): else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) if shared_name is None: shared_name = "" iterator_resource = gen_dataset_ops.iterator( container="", shared_name=shared_name, - output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), + output_types=nest.flatten(output_types), output_shapes=nest.flatten(output_shapes)) - return Iterator(iterator_resource, None, output_types, output_shapes) + return Iterator(iterator_resource, None, output_types, output_shapes, + output_classes) @staticmethod - def from_string_handle(string_handle, output_types, output_shapes=None): + def from_string_handle(string_handle, + output_types, + output_shapes=None, + output_classes=None): """Creates a new, uninitialized `Iterator` based on the given handle. This method allows you to define a "feedable" iterator where you can choose @@ -170,12 +184,14 @@ class Iterator(object): Args: string_handle: A scalar `tf.Tensor` of type `tf.string` that evaluates to a handle produced by the `Iterator.string_handle()` method. - output_types: A nested structure of `tf.DType` (or `tf.data.SparseType`) - objects corresponding to each `tf.Tensor` (or `tf.SparseTensor`) - component of an element of this dataset. + output_types: A nested structure of `tf.DType` objects corresponding to + each component of an element of this dataset. output_shapes: (Optional.) A nested structure of `tf.TensorShape` objects corresponding to each component of an element of this dataset. If omitted, each component will have an unconstrainted shape. + output_classes: (Optional.) A nested structure of Python `type` objects + corresponding to each component of an element of this iterator. If + omitted, each component is assumed to be of type `tf.Tensor`. Returns: An `Iterator`. @@ -187,13 +203,16 @@ class Iterator(object): else: output_shapes = nest.map_structure_up_to( output_types, tensor_shape.as_shape, output_shapes) + if output_classes is None: + output_classes = nest.map_structure(lambda _: ops.Tensor, output_types) nest.assert_same_structure(output_types, output_shapes) string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string) iterator_resource = gen_dataset_ops.iterator_from_string_handle( string_handle, - output_types=nest.flatten(sparse.unwrap_sparse_types(output_types)), + output_types=nest.flatten(output_types), output_shapes=nest.flatten(output_shapes)) - return Iterator(iterator_resource, None, output_types, output_shapes) + return Iterator(iterator_resource, None, output_types, output_shapes, + output_classes) @property def initializer(self): @@ -230,6 +249,13 @@ class Iterator(object): with ops.name_scope(name, "make_initializer") as name: nest.assert_same_structure(self._output_types, dataset.output_types) nest.assert_same_structure(self._output_shapes, dataset.output_shapes) + for iterator_class, dataset_class in zip( + nest.flatten(self._output_classes), + nest.flatten(dataset.output_classes)): + if iterator_class is not dataset_class: + raise TypeError( + "Expected output classes %r but got dataset with output class %r." + % (self._output_classes, dataset.output_classes)) for iterator_dtype, dataset_dtype in zip( nest.flatten(self._output_types), nest.flatten(dataset.output_types)): if iterator_dtype != dataset_dtype: @@ -237,8 +263,8 @@ class Iterator(object): "Expected output types %r but got dataset with output types %r." % (self._output_types, dataset.output_types)) for iterator_shape, dataset_shape in zip( - nest.flatten(self._output_shapes), - nest.flatten(dataset.output_shapes)): + nest.flatten(self._output_shapes), nest.flatten( + dataset.output_shapes)): if not iterator_shape.is_compatible_with(dataset_shape): raise TypeError("Expected output shapes compatible with %r but got " "dataset with output shapes %r." % @@ -261,11 +287,15 @@ class Iterator(object): gen_dataset_ops.iterator_get_next( self._iterator_resource, output_types=nest.flatten( - sparse.unwrap_sparse_types( - self._output_types)), + sparse.as_dense_types( + self._output_types, + self._output_classes)), output_shapes=nest.flatten( - self._output_shapes), - name=name)), self._output_types) + sparse.as_dense_shapes( + self._output_shapes, + self._output_classes)), + name=name)), self._output_types, + self._output_shapes, self._output_classes) def string_handle(self, name=None): """Returns a string-valued `tf.Tensor` that represents this iterator. @@ -282,13 +312,25 @@ class Iterator(object): return gen_dataset_ops.iterator_to_string_handle( self._iterator_resource, name=name) + @property + def output_classes(self): + """Returns the class of each component of an element of this iterator. + + The expected values are `tf.Tensor` and `tf.SparseTensor`. + + Returns: + A nested structure of Python `type` objects corresponding to each + component of an element of this dataset. + """ + return self._output_classes + @property def output_shapes(self): """Returns the shape of each component of an element of this iterator. Returns: A nested structure of `tf.TensorShape` objects corresponding to each - component of an element of this iterator. + component of an element of this dataset. """ return self._output_shapes @@ -297,8 +339,7 @@ class Iterator(object): """Returns the type of each component of an element of this iterator. Returns: - A nested structure of `tf.DType` (or `tf.data.SparseType`) objects - corresponding to each `tf.Tensor` (or `tf.SparseTensor`) component of an - element of this dataset. + A nested structure of `tf.DType` objects corresponding to each component + of an element of this dataset. """ return self._output_types diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index 531716581f..c6fb8531ae 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -70,6 +70,10 @@ class TextLineDataset(Dataset): return gen_dataset_ops.text_line_dataset( self._filenames, self._compression_type, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.scalar() @@ -110,6 +114,10 @@ class TFRecordDataset(Dataset): return gen_dataset_ops.tf_record_dataset( self._filenames, self._compression_type, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.TensorShape([]) @@ -159,6 +167,10 @@ class FixedLengthRecordDataset(Dataset): self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size) + @property + def output_classes(self): + return ops.Tensor + @property def output_shapes(self): return tensor_shape.scalar() diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index 41d8513b16..f7d7fe98d3 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -38,8 +38,10 @@ py_library( deps = [ ":nest", "//tensorflow/python:dtypes", + "//tensorflow/python:ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python:util", "@six_archive//:six", ], @@ -56,6 +58,7 @@ py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", ], ) diff --git a/tensorflow/python/data/util/sparse.py b/tensorflow/python/data/util/sparse.py index 673fac095c..4d25f6a963 100644 --- a/tensorflow/python/data/util/sparse.py +++ b/tensorflow/python/data/util/sparse.py @@ -19,29 +19,70 @@ from __future__ import print_function from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import sparse_ops -def any_sparse(types): - """Checks for sparse tensor types. +def any_sparse(classes): + """Checks for sparse tensor. Args: - types: a structure with tensor types. + classes: a structure of objects that identify the dataset item classes Returns: - `True` if `types` contains a sparse tensor type and `False` otherwise. + `True` if `classes` contains a sparse tensor type and `False` otherwise. """ - return any([isinstance(ty, SparseType) for ty in nest.flatten(types)]) + return any([c is sparse_tensor.SparseTensor for c in nest.flatten(classes)]) -def deserialize_sparse_tensors(tensors, types): +def as_dense_shapes(shapes, classes): + """Converts sparse tensor shapes to their physical shapes. + + Args: + shapes: a structure of shapes to convert. + classes: a structure of objects that identify the dataset item classes + + Returns: + a structure matching the nested structure of `shapes`, containing + `tensor_shape.unknown_shape()` at positions where `classes` contains + `tf.SparseTensor` and matching contents of `shapes` otherwise + """ + ret = nest.pack_sequence_as(shapes, [ + tensor_shape.unknown_shape() if c is sparse_tensor.SparseTensor else shape + for shape, c in zip(nest.flatten(shapes), nest.flatten(classes)) + ]) + return ret + + +def as_dense_types(types, classes): + """Converts sparse tensor types to `dtypes.string`. + + Args: + types: a structure of types to convert. + classes: a structure of objects that identify the dataset item classes + + Returns: + a structure matching the nested structure of `types`, containing + `dtypes.string` at positions where `classes` contains `tf.SparseTensor` and + matching contents of `types` otherwise + """ + ret = nest.pack_sequence_as(types, [ + dtypes.string if c is sparse_tensor.SparseTensor else ty + for ty, c in zip(nest.flatten(types), nest.flatten(classes)) + ]) + return ret + + +def deserialize_sparse_tensors(tensors, types, shapes, classes): """Deserializes sparse tensors. Args: tensors: a structure of tensors to deserialize. - types: a structure object the holds information about which tensors in - `tensors` represent serialized sparse tensors + types: a structure that holds information about types of `tensors` + shapes: a structure that holds information about shapes of `tensors` + classes: a structure of objects that identify the dataset item classes Returns: `tensors` with any serialized sparse tensors replaced by their deserialized @@ -49,27 +90,29 @@ def deserialize_sparse_tensors(tensors, types): """ # TODO(b/63669786): support batching of sparse tensors ret = nest.pack_sequence_as(types, [ - sparse_ops.deserialize_sparse(tensor, ty.dtype) - if isinstance(ty, SparseType) else tensor - for (tensor, ty) in zip(nest.flatten(tensors), nest.flatten(types)) + sparse_ops.deserialize_sparse(tensor, dtype=ty, rank=shape.ndims) + if c is sparse_tensor.SparseTensor else tensor + for (tensor, ty, shape, c) in zip( + nest.flatten(tensors), nest.flatten(types), nest.flatten(shapes), + nest.flatten(classes)) ]) return ret -def get_sparse_types(tensors): - """Gets sparse types for a structure of tensors. +def get_classes(tensors): + """Gets classes for a structure of tensors. Args: - tensors: the tensor structure to get sparse types for. + tensors: the tensor structure to get classes for. Returns: a structure matching the nested structure of `tensors`, containing - `SparseType` at positions where `tensors` contains a sparse tensor and - `None` otherwise + `tf.SparseTensor` at positions where `tensors` contains a sparse tensor and + `tf.Tensor` otherwise """ return nest.pack_sequence_as(tensors, [ - SparseType(tensor.dtype) - if isinstance(tensor, sparse_tensor.SparseTensor) else None + sparse_tensor.SparseTensor + if isinstance(tensor, sparse_tensor.SparseTensor) else ops.Tensor for tensor in nest.flatten(tensors) ]) @@ -90,74 +133,3 @@ def serialize_sparse_tensors(tensors): for tensor in nest.flatten(tensors) ]) return ret - - -def unwrap_sparse_types(types): - """Unwraps sparse tensor types as `dtypes.string`. - - Args: - types: a structure of types to unwrap. - - Returns: - a structure matching the nested structure of `types`, containing - `dtypes.string` at positions where `types` contains a sparse tensor and - matching contents of `types` otherwise - """ - ret = nest.pack_sequence_as(types, [ - dtypes.string if isinstance(ty, SparseType) else ty - for ty in nest.flatten(types) - ]) - return ret - - -def wrap_sparse_types(tensors, types): - """Wraps sparse tensor types in `SparseType`. - - Args: - tensors: a structure of tensors for which to wrap types. - types: a structure that holds information about which tensors in - `tensors` represent serialized sparse tensors - - Returns: - a structure matching the nested structure of `tensors`, containing - `SparseType` at positions where `tensors` contains a sparse tensor and - `DType` otherwise - """ - ret = nest.pack_sequence_as(types, [ - tensor.dtype if ty is None else ty - for tensor, ty in zip(nest.flatten(tensors), nest.flatten(types)) - ]) - return ret - - -class SparseType(object): - """Wrapper class for representing types of sparse tensors in tf.data.""" - - def __init__(self, dtype): - """Creates a new instace of `SparseType`. - - Args: - dtype: the sparse tensor type to wrap. - """ - self._dtype = dtype - - def __repr__(self): - return "SparseType({0!r})".format(self._dtype) - - def __eq__(self, other): - """Returns `True` iff `self == other`.""" - if not isinstance(other, SparseType): - return False - return self._dtype == other.dtype - - def __ne__(self, other): - """Returns `True` iff `self != other`.""" - return not self.__eq__(other) - - def __hash__(self): - return self._dtype.__hash__() - - @property - def dtype(self): - """Returns the wrapped sparse tensor type.""" - return self._dtype diff --git a/tensorflow/python/data/util/sparse_test.py b/tensorflow/python/data/util/sparse_test.py index e30ed639c2..a707570bab 100644 --- a/tensorflow/python/data/util/sparse_test.py +++ b/tensorflow/python/data/util/sparse_test.py @@ -22,7 +22,9 @@ from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import test @@ -30,17 +32,200 @@ class SparseTest(test.TestCase): def testAnySparse(self): test_cases = ( - ((), False), - ((None), False), - ((dtypes.string), False), - ((None, -1, dtypes.string), False), - ((sparse.SparseType(dtypes.string)), True), - ((None, sparse.SparseType(dtypes.string)), True), - ((sparse.SparseType(dtypes.string), dtypes.string), True), - ((((sparse.SparseType(dtypes.string)))), True) + { + "classes": (), + "expected": False + }, + { + "classes": (ops.Tensor), + "expected": False + }, + { + "classes": (((ops.Tensor))), + "expected": False + }, + { + "classes": (ops.Tensor, ops.Tensor), + "expected": False + }, + { + "classes": (ops.Tensor, sparse_tensor.SparseTensor), + "expected": True + }, + { + "classes": (sparse_tensor.SparseTensor, sparse_tensor.SparseTensor), + "expected": + True + }, + { + "classes": (sparse_tensor.SparseTensor, ops.Tensor), + "expected": True + }, + { + "classes": (((sparse_tensor.SparseTensor))), + "expected": True + }, ) for test_case in test_cases: - self.assertEqual(sparse.any_sparse(test_case[0]), test_case[1]) + self.assertEqual( + sparse.any_sparse(test_case["classes"]), test_case["expected"]) + + def assertShapesEqual(self, a, b): + for a, b in zip(nest.flatten(a), nest.flatten(b)): + self.assertEqual(a.ndims, b.ndims) + if a.ndims is None: + continue + for c, d in zip(a.as_list(), b.as_list()): + self.assertEqual(c, d) + + def testAsDenseShapes(self): + test_cases = ( + { + "types": (), + "classes": (), + "expected": () + }, + { + "types": tensor_shape.scalar(), + "classes": ops.Tensor, + "expected": tensor_shape.scalar() + }, + { + "types": tensor_shape.scalar(), + "classes": sparse_tensor.SparseTensor, + "expected": tensor_shape.unknown_shape() + }, + { + "types": (tensor_shape.scalar()), + "classes": (ops.Tensor), + "expected": (tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar()), + "classes": (sparse_tensor.SparseTensor), + "expected": (tensor_shape.unknown_shape()) + }, + { + "types": (tensor_shape.scalar(), ()), + "classes": (ops.Tensor, ()), + "expected": (tensor_shape.scalar(), ()) + }, + { + "types": ((), tensor_shape.scalar()), + "classes": ((), ops.Tensor), + "expected": ((), tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar(), ()), + "classes": (sparse_tensor.SparseTensor, ()), + "expected": (tensor_shape.unknown_shape(), ()) + }, + { + "types": ((), tensor_shape.scalar()), + "classes": ((), sparse_tensor.SparseTensor), + "expected": ((), tensor_shape.unknown_shape()) + }, + { + "types": (tensor_shape.scalar(), (), tensor_shape.scalar()), + "classes": (ops.Tensor, (), ops.Tensor), + "expected": (tensor_shape.scalar(), (), tensor_shape.scalar()) + }, + { + "types": (tensor_shape.scalar(), (), tensor_shape.scalar()), + "classes": (sparse_tensor.SparseTensor, (), + sparse_tensor.SparseTensor), + "expected": (tensor_shape.unknown_shape(), (), + tensor_shape.unknown_shape()) + }, + { + "types": ((), tensor_shape.scalar(), ()), + "classes": ((), ops.Tensor, ()), + "expected": ((), tensor_shape.scalar(), ()) + }, + { + "types": ((), tensor_shape.scalar(), ()), + "classes": ((), sparse_tensor.SparseTensor, ()), + "expected": ((), tensor_shape.unknown_shape(), ()) + }, + ) + for test_case in test_cases: + self.assertShapesEqual( + sparse.as_dense_shapes(test_case["types"], test_case["classes"]), + test_case["expected"]) + + def testAsDenseTypes(self): + test_cases = ( + { + "types": (), + "classes": (), + "expected": () + }, + { + "types": dtypes.int32, + "classes": ops.Tensor, + "expected": dtypes.int32 + }, + { + "types": dtypes.int32, + "classes": sparse_tensor.SparseTensor, + "expected": dtypes.string + }, + { + "types": (dtypes.int32), + "classes": (ops.Tensor), + "expected": (dtypes.int32) + }, + { + "types": (dtypes.int32), + "classes": (sparse_tensor.SparseTensor), + "expected": (dtypes.string) + }, + { + "types": (dtypes.int32, ()), + "classes": (ops.Tensor, ()), + "expected": (dtypes.int32, ()) + }, + { + "types": ((), dtypes.int32), + "classes": ((), ops.Tensor), + "expected": ((), dtypes.int32) + }, + { + "types": (dtypes.int32, ()), + "classes": (sparse_tensor.SparseTensor, ()), + "expected": (dtypes.string, ()) + }, + { + "types": ((), dtypes.int32), + "classes": ((), sparse_tensor.SparseTensor), + "expected": ((), dtypes.string) + }, + { + "types": (dtypes.int32, (), dtypes.int32), + "classes": (ops.Tensor, (), ops.Tensor), + "expected": (dtypes.int32, (), dtypes.int32) + }, + { + "types": (dtypes.int32, (), dtypes.int32), + "classes": (sparse_tensor.SparseTensor, (), + sparse_tensor.SparseTensor), + "expected": (dtypes.string, (), dtypes.string) + }, + { + "types": ((), dtypes.int32, ()), + "classes": ((), ops.Tensor, ()), + "expected": ((), dtypes.int32, ()) + }, + { + "types": ((), dtypes.int32, ()), + "classes": ((), sparse_tensor.SparseTensor, ()), + "expected": ((), dtypes.string, ()) + }, + ) + for test_case in test_cases: + self.assertEqual( + sparse.as_dense_types(test_case["types"], test_case["classes"]), + test_case["expected"]) def assertSparseValuesEqual(self, a, b): if not isinstance(a, sparse_tensor.SparseTensor): @@ -70,71 +255,74 @@ class SparseTest(test.TestCase): indices=[[0, 0]], values=[1], dense_shape=[1, 1])), ) for expected in test_cases: + classes = sparse.get_classes(expected) + shapes = nest.map_structure(lambda _: tensor_shape.TensorShape(None), + classes) + types = nest.map_structure(lambda _: dtypes.int32, classes) actual = sparse.deserialize_sparse_tensors( - sparse.serialize_sparse_tensors(expected), - sparse.get_sparse_types(expected)) + sparse.serialize_sparse_tensors(expected), types, shapes, + sparse.get_classes(expected)) nest.assert_same_structure(expected, actual) for a, e in zip(nest.flatten(actual), nest.flatten(expected)): self.assertSparseValuesEqual(a, e) - def testGetSparseTypes(self): - s = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[1], dense_shape=[1, 1]) - t = sparse.SparseType(dtypes.int32) - test_cases = ( - ((), ()), - (s, t), - ((s), (t)), - ((s, ()), (t, ())), - (((), s), ((), t)), - ) - for test_case in test_cases: - self.assertEqual(sparse.get_sparse_types(test_case[0]), test_case[1]) - - def testWrapSparseTypes(self): - c = constant_op.constant([1]) - d = dtypes.int32 - s = sparse_tensor.SparseTensor( - indices=[[0, 0]], values=[1], dense_shape=[1, 1]) - t = sparse.SparseType(dtypes.int32) + def testGetClasses(self): + s = sparse_tensor.SparseTensor(indices=[[0]], values=[1], dense_shape=[1]) + d = ops.Tensor + t = sparse_tensor.SparseTensor test_cases = ( - ((), ()), - (s, t), - (c, d), - ((s), (t)), - ((c), (d)), - ((s, ()), (t, ())), - (((), s), ((), t)), - ((c, ()), (d, ())), - (((), c), ((), d)), - ((s, (), c), (t, (), d)), - (((), s, ()), ((), t, ())), - (((), c, ()), ((), d, ())), + { + "classes": (), + "expected": () + }, + { + "classes": s, + "expected": t + }, + { + "classes": constant_op.constant([1]), + "expected": d + }, + { + "classes": (s), + "expected": (t) + }, + { + "classes": (constant_op.constant([1])), + "expected": (d) + }, + { + "classes": (s, ()), + "expected": (t, ()) + }, + { + "classes": ((), s), + "expected": ((), t) + }, + { + "classes": (constant_op.constant([1]), ()), + "expected": (d, ()) + }, + { + "classes": ((), constant_op.constant([1])), + "expected": ((), d) + }, + { + "classes": (s, (), constant_op.constant([1])), + "expected": (t, (), d) + }, + { + "classes": ((), s, ()), + "expected": ((), t, ()) + }, + { + "classes": ((), constant_op.constant([1]), ()), + "expected": ((), d, ()) + }, ) for test_case in test_cases: self.assertEqual( - sparse.wrap_sparse_types(test_case[0], sparse.get_sparse_types( - test_case[0])), test_case[1]) - - def testUnwrapSparseTypes(self): - d = dtypes.string - t = sparse.SparseType(dtypes.int32) - test_cases = ( - ((), ()), - (t, d), - (d, d), - ((t), (d)), - ((d), (d)), - ((t, ()), (d, ())), - (((), t), ((), d)), - ((d, ()), (d, ())), - (((), d), ((), d)), - ((t, (), d), (d, (), d)), - (((), t, ()), ((), d, ())), - (((), d, ()), ((), d, ())), - ) - for test_case in test_cases: - self.assertEqual(sparse.unwrap_sparse_types(test_case[0]), test_case[1]) + sparse.get_classes(test_case["classes"]), test_case["expected"]) if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d3fa5cb778..7643cf2ddc 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2807,11 +2807,14 @@ tf_py_test( "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:session", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], tags = [ "manual", @@ -2825,18 +2828,12 @@ tf_py_test( srcs = ["dataset_from_generator_op_test.py"], additional_deps = [ "//third_party/py/numpy", - "//tensorflow/core:protos_all_py", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:session", - "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:sparse", ], ) @@ -3082,6 +3079,7 @@ tf_py_test( "//tensorflow/core:protos_all_py", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/util:sparse", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -3096,8 +3094,11 @@ tf_py_test( "//tensorflow/python:io_ops", "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", + "//tensorflow/python:random_ops", "//tensorflow/python:script_ops", "//tensorflow/python:session", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python:training", ], ) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 2ef6a0015b..3d6f942dca 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1442,7 +1442,7 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): Must have 3 columns. dtype: The `dtype` of the serialized `SparseTensor` object. rank: (optional) Python int, the rank of the `SparseTensor` object. - name: A name prefix for the returned tensors (optional) + name: A name prefix for the returned tensors (optional). Returns: A `SparseTensor` representing the deserialized `SparseTensor` object. diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt index d12514fe77..42de5c0c80 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt @@ -2,6 +2,10 @@ path: "tensorflow.data.Dataset" tf_class { is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt index 002d0c6a9f..e2fc8d6cb1 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt index e62f6b247a..1f9aeb6ad6 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-iterator.pbtxt @@ -6,6 +6,10 @@ tf_class { name: "initializer" mtype: "" } + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" @@ -16,15 +20,15 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'iterator_resource\', \'initializer\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=None" } member_method { name: "from_string_handle" - argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'string_handle\', \'output_types\', \'output_shapes\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } member_method { name: "from_structure" - argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'output_types\', \'output_shapes\', \'shared_name\', \'output_classes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "get_next" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt deleted file mode 100644 index b25f9a029f..0000000000 --- a/tensorflow/tools/api/golden/tensorflow.data.-sparse-type.pbtxt +++ /dev/null @@ -1,13 +0,0 @@ -path: "tensorflow.data.SparseType" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 2b476dab66..9770389e5e 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt index c4c5ac0775..7263230c1c 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt @@ -3,6 +3,10 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "output_classes" + mtype: "" + } member { name: "output_shapes" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.pbtxt index b9f54a4d72..56fb270a49 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.pbtxt @@ -12,10 +12,6 @@ tf_module { name: "Iterator" mtype: "" } - member { - name: "SparseType" - mtype: "" - } member { name: "TFRecordDataset" mtype: "" -- GitLab From cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Fri, 17 Nov 2017 13:05:02 -0800 Subject: [PATCH 0065/1225] Add field to HLO proto field to enable reversing a convolution filter. PiperOrigin-RevId: 176149369 --- .../xla/client/computation_builder.cc | 1 + .../compiler/xla/service/hlo_evaluator.cc | 4 +- .../xla/service/hlo_evaluator_test.cc | 77 +++++++++++++++++++ tensorflow/compiler/xla/window_util.cc | 3 + tensorflow/compiler/xla/xla_data.proto | 4 + 5 files changed, 88 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 763d94e94c..b1f4ea8ab6 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -153,6 +153,7 @@ bool ComputationBuilder::MakeWindow( } else { dim->set_window_dilation(1); } + dim->set_window_reversal(false); } return true; } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index a722d1b3d9..2bd9723dbe 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -814,7 +814,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } rhs_index[dnums.kernel_spatial_dimensions(ki)] = - rhs_spatial_index[ki]; + window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]; } result_val += lhs_literal.Get(lhs_index) * diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 85477af6fe..94929dda6a 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -794,6 +794,83 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) { LiteralTestUtil::ExpectEqual(*expected, *result); } +TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) { + HloComputation::Builder b(TestName()); + + // clang-format off + // Input dimensions: [feature=2, height=3, batch=1, width=4] + Array4D input({ + {{{1, 2, 3, 4}}, + {{5, 6, 7, 8}}, + {{9, 10, 11, 12}}}, + {{{13, 14, 15, 16}}, + {{17, 18, 19, 20}}, + {{21, 22, 23, 24}}} + }); + // Weight dimensions: + // [kernel_output_feature=1, width=3, kernel_input_feature=2, height=3] + Array4D weight({{ + {{1, 7, 13}, + {4, 10, 16}}, + {{2, 8, 14}, + {5, 11, 17}}, + {{3, 9, 15}, + {6, 12, 18}} + }}); + // clang-format on + + auto lhs_literal = Literal::CreateR4FromArray4D(input); + HloInstruction* lhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal))); + + auto rhs_literal = Literal::CreateR4FromArray4D(weight); + HloInstruction* rhs_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal))); + rhs_instruction = b.AddInstruction(HloInstruction::CreateReverse( + rhs_instruction->shape(), rhs_instruction, {3, 1})); + + Window window; + WindowDimension dim; + dim.set_size(3); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(1); + dim.set_base_dilation(1); + dim.set_window_reversal(true); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(2); + dnums.set_output_batch_dimension(2); + dnums.set_input_feature_dimension(0); + dnums.set_output_feature_dimension(0); + dnums.add_spatial_dimensions(1); + dnums.add_spatial_dimensions(3); + + dnums.set_kernel_output_feature_dimension(0); + dnums.set_kernel_input_feature_dimension(2); + dnums.add_kernel_spatial_dimensions(3); + dnums.add_kernel_spatial_dimensions(1); + + const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); + b.AddInstruction(HloInstruction::CreateConvolve( + shape, lhs_instruction, rhs_instruction, window, dnums)); + auto computation = module().AddEntryComputation(b.Build()); + + std::unique_ptr result = + evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie(); + + // clang-format off + // Result dimensions: [feature=1, height=1, batch=1, width=2] + Array4D expected_array({{{{2514, 2685}}}}); + // clang-format on + auto expected = Literal::CreateR4FromArray4D(expected_array); + + LiteralTestUtil::ExpectEqual(*expected, *result); +} + TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/window_util.cc b/tensorflow/compiler/xla/window_util.cc index 6f7f1479b9..2e0eba8de0 100644 --- a/tensorflow/compiler/xla/window_util.cc +++ b/tensorflow/compiler/xla/window_util.cc @@ -44,6 +44,9 @@ namespace window_util { if (dim.window_dilation() != 1) { StrAppend(&str, ",window_dilation=", dim.window_dilation()); } + if (dim.window_reversal()) { + StrAppend(&str, ",window_reversal"); + } StrAppend(&str, ")"); return str; } diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index eac8f2ff07..39f5806739 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -357,6 +357,10 @@ message WindowDimension { // means no dilation. base_dilation - 1 no-op entries ("holes") are implicitly // placed between each base area element. See documentation for convolution. int64 base_dilation = 6; + + // Window reversal means that this dimension was logically reversed before the + // operation. + bool window_reversal = 7; } // Describes the windowing in an operation such as convolution. -- GitLab From 704d66d66508d10bd12f39d2f99de4eb8c8ad7b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 13:11:10 -0800 Subject: [PATCH 0066/1225] Temporarily disable tsan for wals_test. PiperOrigin-RevId: 176150090 --- tensorflow/contrib/factorization/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index fe86a20ab1..29a0a4221a 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -270,6 +270,7 @@ tf_py_test( "manual", "noasan", # times out b/63678675 "nomsan", + "notsan", # b/69374301 ], ) -- GitLab From d79dd4993061670c1ec5ea01db3022f28d72d0a3 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 17 Nov 2017 13:55:17 -0800 Subject: [PATCH 0067/1225] Fix shutdown race in ClusterSpec propagation. Previously, the DeregisterGraph and DeleteWorkerSession RPCs could race against each other, leading to undefined behavior. This change inhibits the unnecessary DeregisterGraph RPCs when DeleteWorkerSession is being used, which both fixes the race and cuts down on unnecessary network traffic on the Session::Close path. PiperOrigin-RevId: 176155626 --- .../core/distributed_runtime/master_session.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 91a1fa7d1e..b3e499be79 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -67,13 +67,14 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const SessionOptions& session_opts, const StatsPublisherFactory& stats_publisher_factory, GraphExecutionState* execution_state, bool is_partial, - WorkerCacheInterface* worker_cache) + WorkerCacheInterface* worker_cache, bool should_deregister) : session_handle_(handle), client_graph_(std::move(cg)), session_opts_(session_opts), is_partial_(is_partial), debug_opts_(bopts.debug_options), - worker_cache_(worker_cache) { + worker_cache_(worker_cache), + should_deregister_(should_deregister) { VLOG(1) << "Created ReffedClientGraph for node with " << client_graph()->graph.num_node_ids(); @@ -85,7 +86,11 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { } } - ~ReffedClientGraph() override { DeregisterPartitions(); } + ~ReffedClientGraph() override { + if (should_deregister_) { + DeregisterPartitions(); + } + } const ClientGraph* client_graph() { return client_graph_.get(); } @@ -209,6 +214,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { const DebugOptions& debug_opts_; WorkerCacheInterface* const worker_cache_; // Not owned. std::unordered_map name_to_node_; + const bool should_deregister_; // Graph partitioned into per-location subgraphs. struct Part { @@ -1262,7 +1268,7 @@ Status MasterSession::StartStep(const BuildGraphOptions& opts, int64* count, auto entry = new ReffedClientGraph( handle_, opts, std::move(client_graph), session_opts_, stats_publisher_factory_, execution_state_.get(), is_partial, - worker_cache); + worker_cache, !should_delete_worker_sessions_); iter = m->insert({hash, entry}).first; VLOG(1) << "Preparing to execute new graph"; } -- GitLab From 3f888e1539db5551cfcf9ee837a0555c224e0018 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:16:09 -0800 Subject: [PATCH 0068/1225] Add a Compiler::BuildExecutable interface that compiles the given Hlo module without optimizations. PiperOrigin-RevId: 176158846 --- tensorflow/compiler/xla/service/compiler.h | 17 +++++++++--- .../compiler/xla/service/cpu/cpu_compiler.cc | 26 ++++++++++++------- .../compiler/xla/service/cpu/cpu_compiler.h | 6 ++++- .../compiler/xla/service/gpu/gpu_compiler.cc | 22 +++++++++------- .../compiler/xla/service/gpu/gpu_compiler.h | 6 ++++- tensorflow/compiler/xla/service/hlo_runner.cc | 19 +++++++++----- tensorflow/compiler/xla/service/hlo_runner.h | 15 +++++++---- .../xla/service/interpreter/compiler.cc | 12 +++++++-- .../xla/service/interpreter/compiler.h | 8 ++++-- .../compiler/xla/service/llvm_compiler.cc | 4 ++- .../compiler/xla/service/llvm_compiler.h | 12 ++++++--- tensorflow/compiler/xla/service/service.cc | 5 +++- .../compiler/xla/tests/codegen_test_base.cc | 7 +++-- .../compiler/xla/tests/llvm_compiler_test.cc | 4 +-- 14 files changed, 114 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 5f021900c8..fc67330f5c 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -97,21 +97,32 @@ class Compiler { // Returns the ID of the platform that this compiler targets. virtual perftools::gputools::Platform::Id PlatformId() const = 0; + // Runs Hlo passes to optimize the given Hlo module, returns the optimized + // module. + virtual StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* executor) = 0; + // Compiles the HLO module for execution on a device given by the executor, - // and returns an executable object or an error status. Takes ownership of the - // HLO module and is free to transform it. + // and returns an executable object or an error status. No HLO passes are + // applied to module. Generally a module should be passed through RunHloPasses + // prior to calling this method because the some HLO passes are required for + // correctness. Takes ownership of the HLO module and is free to transform it. // // The compiler may optionally specialize to the individual device // (not just type of device) indicated by the executor. // // Use the overload below to compile computations that run in parallel. - virtual StatusOr> Compile( + virtual StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* executor) = 0; // Compiles a set of HLO modules that can run in parallel, potentially // communicating data between the modules, and returns a corresponding // sequence of executable objects. + // + // TODO(b/68666782): Remove this method after adding support for multiple + // modules to RunHloPasses and RunBackends. virtual StatusOr>> Compile( std::vector> modules, std::vector> diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index f5b95d3657..b04a279395 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -426,8 +426,22 @@ Status InitializeModuleHooks( } // namespace -StatusOr> CpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { +StatusOr> CpuCompiler::RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* /*stream_exec*/) { + VLOG(2) << "Before optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + + TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); + + VLOG(2) << "After optimization:"; + XLA_VLOG_LINES(2, module->ToString()); + return std::move(module); +} + +StatusOr> CpuCompiler::RunBackend( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) { const string timer_message = "Compiling [" + module->name() + "] for CPU using JIT"; ScopedLoggingTimer compiling_timer(timer_message, 1); @@ -458,14 +472,6 @@ StatusOr> CpuCompiler::Compile( llvm_module->setDataLayout(jit->data_layout()); llvm_module->setTargetTriple(jit->target_triple().getTriple()); - VLOG(2) << "Before optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - - TF_RETURN_IF_ERROR(RunHloPasses(module.get(), /*is_aot_compile=*/false)); - - VLOG(2) << "After optimization:"; - XLA_VLOG_LINES(2, module->ToString()); - HloComputation* computation = module->entry_computation(); std::unordered_map hlo_to_profile_idx; if (module->config().hlo_profiling_enabled()) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index 963aced208..ebed7058d8 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -116,7 +116,11 @@ class CpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 23fb308ec6..937d453a5c 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -126,7 +126,7 @@ string GetLibdeviceDir(const string& config_cuda_data_dir) { // Runs optimization passes on the given HLO module. tensorflow::Status OptimizeHloModule( - HloModule* hlo_module, const se::DeviceDescription& device_desc, + HloModule* hlo_module, const HloCostAnalysis::ShapeSizeFunction& shape_size_function) { { HloPassPipeline pipeline("optimization"); @@ -297,19 +297,23 @@ StatusOr> CompilePtx(const string& ptx, int cc_major, GpuCompiler::GpuCompiler() : pointer_size_(llvm::DataLayout(kDataLayout).getPointerSize()) {} -StatusOr> GpuCompiler::Compile( - std::unique_ptr module, se::StreamExecutor* stream_exec) { - TF_RET_CHECK(stream_exec != nullptr); - +StatusOr> GpuCompiler::RunHloPasses( + std::unique_ptr module, se::StreamExecutor* /*stream_exec*/) { { Tracing::TraceMe annotation("HLO Transforms", module->name(), /*is_expensive=*/true); - TF_RETURN_IF_ERROR(OptimizeHloModule(module.get(), - stream_exec->GetDeviceDescription(), - ShapeSizeBytesFunction())); TF_RETURN_IF_ERROR( - PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); + OptimizeHloModule(module.get(), ShapeSizeBytesFunction())); } + return std::move(module); +} + +StatusOr> GpuCompiler::RunBackend( + std::unique_ptr module, se::StreamExecutor* stream_exec) { + TF_RET_CHECK(stream_exec != nullptr); + + TF_RETURN_IF_ERROR( + PrepareHloModuleForIrEmitting(module.get(), ShapeSizeBytesFunction())); llvm::LLVMContext llvm_context; std::string buffer; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h index fe5fce615f..18e3434020 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.h @@ -49,7 +49,11 @@ class GpuCompiler : public LLVMCompiler { // stream_execs) using LLVMCompiler::Compile; - StatusOr> Compile( + StatusOr> RunHloPasses( + std::unique_ptr module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( std::unique_ptr module, perftools::gputools::StreamExecutor* stream_exec) override; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 63f2b1296e..6b6d48233a 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -114,11 +114,16 @@ HloRunner::~HloRunner() { StatusOr HloRunner::Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape) { + Shape* result_shape, bool run_hlo_passes) { + if (run_hlo_passes) { + TF_ASSIGN_OR_RETURN( + module, backend().compiler()->RunHloPasses( + std::move(module), backend().default_stream_executor())); + } TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend().compiler()->Compile(std::move(module), - backend().default_stream_executor())); + backend().compiler()->RunBackend(std::move(module), + backend().default_stream_executor())); se::Stream stream(backend().default_stream_executor()); stream.Init(); @@ -193,10 +198,12 @@ StatusOr> HloRunner::TransferFromDevice( StatusOr> HloRunner::ExecuteAndTransfer( std::unique_ptr module, - tensorflow::gtl::ArraySlice arguments) { + tensorflow::gtl::ArraySlice arguments, + bool run_hlo_passes) { Shape result_shape; - TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase device_base, - Execute(std::move(module), arguments, &result_shape)); + TF_ASSIGN_OR_RETURN( + se::DeviceMemoryBase device_base, + Execute(std::move(module), arguments, &result_shape, run_hlo_passes)); return TransferFromDevice(result_shape, device_base); } diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index a5732848c6..95cddafc91 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -65,17 +65,20 @@ class HloRunner { // Executes the given module with given literals as input and returns the // result as a Literal. The LiteralPtr type accepts Literal* or // std::unique_ptr. + // If run_hlo_passes is true, the module will be executed without Hlo + // optimization. template StatusOr> Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals); + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes = true); // Executes the given module and returns a global data handle. StatusOr Execute( std::unique_ptr module, tensorflow::gtl::ArraySlice arguments, - Shape* result_shape); + Shape* result_shape, bool run_hlo_passes = true); // Transfers the given literal to the device and returns the data handle. StatusOr TransferToDevice( @@ -90,7 +93,8 @@ class HloRunner { StatusOr> ExecuteAndTransfer( std::unique_ptr module, tensorflow::gtl::ArraySlice - arguments); + arguments, + bool run_hlo_passes = true); // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. @@ -112,14 +116,15 @@ class HloRunner { template StatusOr> HloRunner::Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice literals) { + const tensorflow::gtl::ArraySlice literals, + bool run_hlo_passes) { std::vector arguments; for (const auto& literal : literals) { TF_ASSIGN_OR_RETURN(perftools::gputools::DeviceMemoryBase argument, TransferToDevice(*literal)); arguments.push_back(argument); } - return ExecuteAndTransfer(std::move(module), arguments); + return ExecuteAndTransfer(std::move(module), arguments, run_hlo_passes); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 6d5796a24b..c9a5285a4f 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -69,11 +69,19 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { return pipeline.Run(hlo_module).status(); } -StatusOr> InterpreterCompiler::Compile( +StatusOr> InterpreterCompiler::RunHloPasses( + std::unique_ptr hlo_module, + se::StreamExecutor* /*stream_exec*/) { + VLOG(1) << "Run hlo passes on graph " << hlo_module->name(); + TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); + return std::move(hlo_module); +} + +StatusOr> InterpreterCompiler::RunBackend( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec) { TF_RET_CHECK(stream_exec != nullptr); - VLOG(1) << "Generate graph " << hlo_module->name(); + VLOG(1) << "Run backend " << hlo_module->name(); TF_RETURN_IF_ERROR(RunHloOptimization(hlo_module.get())); diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index cfdc9b6256..278cf51842 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -43,8 +43,12 @@ class InterpreterCompiler : public Compiler { InterpreterCompiler() {} ~InterpreterCompiler() override {} - StatusOr> Compile( - std::unique_ptr hlo_modules, + StatusOr> RunHloPasses( + std::unique_ptr hlo_module, + perftools::gputools::StreamExecutor* stream_exec) override; + + StatusOr> RunBackend( + std::unique_ptr hlo_module, perftools::gputools::StreamExecutor* stream_exec) override; StatusOr>> Compile( diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index ba0304fb8c..34f3419269 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -27,8 +27,10 @@ StatusOr>> LLVMCompiler::Compile( "Model partitioning not implemented for the CPU/GPU compilers!"); } + TF_ASSIGN_OR_RETURN( + modules[i], RunHloPasses(std::move(modules[i]), stream_execs[i][0])); TF_ASSIGN_OR_RETURN(std::unique_ptr executable, - Compile(std::move(modules[i]), stream_execs[i][0])); + RunBackend(std::move(modules[i]), stream_execs[i][0])); result.push_back(std::move(executable)); } diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index c4f689eabe..c5393cef4f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -58,10 +58,14 @@ class LLVMCompiler : public Compiler { void RemovePostOptimizationHook() { user_post_optimization_hook_ = nullptr; } // Bring in - // StatusOr> Compile( - // std::unique_ptr module, - // perftools::gputools::StreamExecutor* executor) - using Compiler::Compile; + // StatusOr> RunBackend( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + // StatusOr> RunHloPasses( + // std::unique_ptr module, + // perftools::gputools::StreamExecutor* stream_exec) + using Compiler::RunBackend; + using Compiler::RunHloPasses; StatusOr>> Compile( std::vector> modules, diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index ee9501dd48..0544a1697b 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -430,9 +430,12 @@ StatusOr> Service::BuildExecutable( /*include_unreachable_instructions=*/ true)); + TF_ASSIGN_OR_RETURN( + module, backend->compiler()->RunHloPasses(std::move(module), executor)); + TF_ASSIGN_OR_RETURN( std::unique_ptr executable, - backend->compiler()->Compile(std::move(module), executor)); + backend->compiler()->RunBackend(std::move(module), executor)); if (!other_directory_path.empty()) { executable->set_session_module(std::move(session_module)); diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc index 43ea7f6019..e472408dcf 100644 --- a/tensorflow/compiler/xla/tests/codegen_test_base.cc +++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc @@ -19,8 +19,11 @@ namespace xla { StatusOr> CodegenTestBase::CompileToExecutable( std::unique_ptr hlo_module) { - return backend().compiler()->Compile(std::move(hlo_module), - backend().default_stream_executor()); + TF_ASSIGN_OR_RETURN(hlo_module, backend().compiler()->RunHloPasses( + std::move(hlo_module), + backend().default_stream_executor())); + return backend().compiler()->RunBackend(std::move(hlo_module), + backend().default_stream_executor()); } StatusOr> diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 62fab6a224..b5b95967ff 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -73,8 +73,8 @@ class LLVMCompilerTest : public ::testing::Test { compiler->SetPostOptimizationHook(post_opt_hook); ASSERT_TRUE(compiler - ->Compile(std::move(hlo_module), - backend_->default_stream_executor()) + ->RunBackend(std::move(hlo_module), + backend_->default_stream_executor()) .ok()); // Test that hooks were called. -- GitLab From 6610eb74981ffcbaaba6befc241ad6d34aded81e Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 17 Nov 2017 14:17:22 -0800 Subject: [PATCH 0069/1225] tfdbg: fix missing space in grpc error message PiperOrigin-RevId: 176159019 --- tensorflow/core/kernels/debug_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h index 2c21053121..381add3fb3 100644 --- a/tensorflow/core/kernels/debug_ops.h +++ b/tensorflow/core/kernels/debug_ops.h @@ -185,7 +185,7 @@ class BaseDebugOp : public OpKernel { if (!status.ok()) { LOG(ERROR) << "Debug node of watch key " << debug_watch_key_->debug_node_name - << "failed to publish debug tensor data to all URLs " + << " failed to publish debug tensor data to all URLs " << str_util::Join(debug_urls_, ", ") << ", due to: " << status.error_message(); } -- GitLab From 61b0ddca2570215a625e22f76348f51ffd661ddf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:23:01 -0800 Subject: [PATCH 0070/1225] Modify QuantizeAddContexts so that ops are added deterministically. This is needed when using multiple worker replicas so that the ops can be initialized consistently. PiperOrigin-RevId: 176159819 --- tensorflow/contrib/quantize/python/quantize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 7db2d863aa..50a2b4c91c 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -164,7 +164,10 @@ class _QuantizeContext(object): def QuantizeAddContexts(self): """Quantizes all add ops in self.add_contexts.""" - for add_context in self.add_contexts: + # Loop through sorted self.add_contexts so that op creation is + # deterministic. This is needed when using multiple worker replicas so that + # the ops can be initialized consistently. + for add_context in sorted(self.add_contexts): add_op = self.GetOperationByNamesDontThrow([ add_context + '/Add', add_context + '/add']) if add_op is not None: -- GitLab From 3094dfcf387c122b678230b6c0df778aad594d1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 14:36:39 -0800 Subject: [PATCH 0071/1225] [XLA:GPU] Partially enable Winograd convolution algorithm. Diable the algorithm for certain inputs to avoid a known bug in cuDNNv5 and cuDNNv6. PiperOrigin-RevId: 176161830 --- .../xla/service/gpu/convolution_thunk.cc | 37 +++++++++++++++---- .../xla/service/gpu/convolution_thunk.h | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index e79d0a4c79..5fe5f55857 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -258,22 +258,19 @@ tensorflow::Status ConvolutionThunk::Convolve( } std::vector ConvolutionThunk::GetAlgorithms( - se::StreamExecutor* stream_exec) const { + bool with_winograd_nonfused, se::StreamExecutor* stream_exec) const { std::vector algorithms; - // TODO(yangzihao): Currently disable the use of winograd nonfused in XLA - // by default. Should send in conv parameters and enable it when - // ShouldIncludeWinogradNonfusedAlgo() returns true. switch (convolution_kind_) { case ConvolutionKind::kBackwardFilter: CHECK(stream_exec->GetConvolveBackwardFilterAlgorithms( - /*with_winograd_nonfused=*/false, &algorithms)); + with_winograd_nonfused, &algorithms)); break; case ConvolutionKind::kBackwardInput: CHECK(stream_exec->GetConvolveBackwardDataAlgorithms( - /*with_winograd_nonfused=*/false, &algorithms)); + with_winograd_nonfused, &algorithms)); break; case ConvolutionKind::kForward: - CHECK(stream_exec->GetConvolveAlgorithms(/*with_winograd_nonfused=*/false, + CHECK(stream_exec->GetConvolveAlgorithms(with_winograd_nonfused, &algorithms)); break; } @@ -287,6 +284,26 @@ static string AlgorithmToString(const se::dnn::AlgorithmDesc& algo) { return tensorflow::strings::StrCat(algo.algo_id()); } +// Determines whether we can safely perform a winograd non-fused convolution for +// the given input and output descriptors. This works around b/68264959, an +// integer overflow in cuDNNv5 and cuDNNv6. +static bool ShouldIncludeWinogradNonfusedAlgo( + const BatchDescriptor& input_descriptor, + const BatchDescriptor& output_descriptor) { + int64 batch = input_descriptor.count(); + int64 in_depths = input_descriptor.feature_map_count(); + int64 in_rows = input_descriptor.height(); + int64 in_cols = input_descriptor.width(); + int64 out_depths = output_descriptor.feature_map_count(); + + int64 total_size = 16 * std::ceil(batch / 16.0) * + std::max(in_depths, out_depths) * in_cols * in_rows * + sizeof(float); + int64 threshold = 1L << 31; + + return total_size < threshold; +} + tensorflow::Status ConvolutionThunk::ConvolveWithTune( const BatchDescriptor& input_descriptor, se::DeviceMemory input_data, const FilterDescriptor& filter_descriptor, @@ -303,9 +320,13 @@ tensorflow::Status ConvolutionThunk::ConvolveWithTune( "ConvolutionThunk: " << this; + bool with_winograd_nonfused = + ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor); + se::dnn::ProfileResult best_result; se::dnn::ProfileResult best_result_without_scratch; - std::vector algorithms = GetAlgorithms(stream->parent()); + std::vector algorithms = + GetAlgorithms(with_winograd_nonfused, stream->parent()); for (auto algorithm : algorithms) { ConvolveScratchAllocator scratch_allocator( buffer_allocations.device_ordinal(), diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index 13432301b2..5ac5db2f04 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -116,6 +116,7 @@ class ConvolutionThunk : public Thunk { // Returns the convolve algorithms that can be used for this ConvolutionThunk. std::vector GetAlgorithms( + bool with_winograd_nonfused, perftools::gputools::StreamExecutor* stream_exec) const; // Fastest cuDNN convolution algorithm for this thunk learned from -- GitLab From 9b858b88784b6a9232d23d3a13353cd6ef43cd18 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 17 Nov 2017 14:53:59 -0800 Subject: [PATCH 0072/1225] Avoid reordering in ports in SwitchGrad for CondContext. PiperOrigin-RevId: 176164285 --- tensorflow/python/ops/control_flow_grad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 3c082b19b6..22dc6771ec 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -69,13 +69,12 @@ def _SwitchGrad(op, *grad): # meaning the output is not differentiable. return None, None elif isinstance(op_ctxt, CondContext): - good_grad = grad[op_ctxt.branch] zero_grad = grad[1 - op_ctxt.branch] # At this point, we have created zero_grad guarded by the right switch. # Unfortunately, we may still get None here for not trainable data types. if zero_grad is None: return None, None - return merge([good_grad, zero_grad], name="cond_grad")[0], None + return merge(grad, name="cond_grad")[0], None else: false_grad = switch(grad[0], op.inputs[1])[0] true_grad = switch(grad[1], op.inputs[1])[1] -- GitLab From c86793dd597649fdf64964f87e6f8e896966e490 Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Fri, 17 Nov 2017 15:11:09 -0800 Subject: [PATCH 0073/1225] Register tile_ops GPU kernel for bool types. PiperOrigin-RevId: 176166731 --- tensorflow/core/kernels/tile_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/tile_ops.cc | 14 ++++++++++++-- tensorflow/core/util/cuda_kernel_helper.h | 10 ++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/tile_functor_gpu.cu.cc b/tensorflow/core/kernels/tile_functor_gpu.cu.cc index 5a36e7567b..84a5060fc3 100644 --- a/tensorflow/core/kernels/tile_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/tile_functor_gpu.cu.cc @@ -90,6 +90,7 @@ typedef Eigen::GpuDevice GPUDevice; template struct Tile; \ template struct Tile; +TF_CALL_bool(DEFINE_TYPE); TF_CALL_int16(DEFINE_TYPE); TF_CALL_int32(DEFINE_TYPE); TF_CALL_int64(DEFINE_TYPE); diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc index fa5afe6a31..68cdae3249 100644 --- a/tensorflow/core/kernels/tile_ops.cc +++ b/tensorflow/core/kernels/tile_ops.cc @@ -222,6 +222,7 @@ TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); TF_CALL_string(HANDLE_TYPE_NAME_CPU); #if GOOGLE_CUDA +TF_CALL_bool(HANDLE_TYPE_NAME_GPU); TF_CALL_float(HANDLE_TYPE_NAME_GPU); TF_CALL_double(HANDLE_TYPE_NAME_GPU); TF_CALL_int16(HANDLE_TYPE_NAME_GPU); @@ -534,7 +535,7 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") TileGradientOp); #if GOOGLE_CUDA -#define REGISTER_GPU(type) \ +#define REGISTER_GPU_TILE(type) \ REGISTER_KERNEL_BUILDER(Name("Tile") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ @@ -546,7 +547,9 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") .TypeConstraint("T") \ .TypeConstraint("Tmultiples") \ .HostMemory("multiples"), \ - TileOp); \ + TileOp); + +#define REGISTER_GPU_TILE_GRAD(type) \ REGISTER_KERNEL_BUILDER(Name("TileGrad") \ .Device(DEVICE_GPU) \ .TypeConstraint("T") \ @@ -560,6 +563,11 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad") .HostMemory("multiples"), \ TileGradientOp); +#define REGISTER_GPU(type) \ + REGISTER_GPU_TILE(type); \ + REGISTER_GPU_TILE_GRAD(type); + +TF_CALL_bool(REGISTER_GPU_TILE); TF_CALL_float(REGISTER_GPU); TF_CALL_double(REGISTER_GPU); TF_CALL_half(REGISTER_GPU); @@ -568,6 +576,8 @@ TF_CALL_int32(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU) +#undef REGISTER_GPU_TILE +#undef REGISTER_GPU_TILE_GRAD #undef REGISTER_GPU #endif // GOOGLE_CUDA diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 8315f208e7..8fa0dfbed9 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -374,6 +374,16 @@ __device__ __host__ inline Eigen::half ldg(const Eigen::half* address) { #endif } +template <> +__device__ __host__ inline bool ldg(const bool* address) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 + return *reinterpret_cast( + __ldg(reinterpret_cast(address))); +#else + return *address; +#endif +} + // CUDA provides atomic ops, but not for all types. We provide wrappers // for some ops and provide implementation for all reasonable types. #define CUDA_ATOMIC_WRAPPER(op, T) \ -- GitLab From 3cc43816cda27c497399bf94429b174db5ed6d6b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Nov 2017 15:19:17 -0800 Subject: [PATCH 0074/1225] Adds validation for categorical_columns in shared_embedding_columns. PiperOrigin-RevId: 176167775 --- .../python/feature_column/feature_column.py | 28 +++++++++++++--- .../feature_column/feature_column_test.py | 32 +++++++++++++++++++ 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 5ee93be7c3..a19636474b 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -689,12 +689,30 @@ def _shared_embedding_columns( raise ValueError('initializer must be callable if specified.') if initializer is None: initializer = init_ops.truncated_normal_initializer( - mean=0.0, stddev=1 / math.sqrt(dimension)) - # TODO(b/67952670): Validate categorical_columns. + mean=0.0, stddev=1. / math.sqrt(dimension)) + + # Sort the columns so the default collection name is deterministic even if the + # user passes columns from an unsorted collection, such as dict.values(). + sorted_columns = sorted(categorical_columns, key=lambda x: x.name) + + c0 = sorted_columns[0] + if not isinstance(c0, _CategoricalColumn): + raise ValueError( + 'All categorical_columns must be subclasses of _CategoricalColumn. ' + 'Given: {}, of type: {}'.format(c0, type(c0))) + if isinstance(c0, _WeightedCategoricalColumn): + c0 = c0.categorical_column + for c in sorted_columns[1:]: + if isinstance(c, _WeightedCategoricalColumn): + c = c.categorical_column + if not isinstance(c, type(c0)): + raise ValueError( + 'To use shared_embedding_column, all categorical_columns must have ' + 'the same type, or be weighted_categorical_column of the same type. ' + 'Given column: {} of type: {} does not match given column: {} of ' + 'type: {}'.format(c0, type(c0), c, type(c))) + if not shared_embedding_collection_name: - # Sort the columns so the name is deterministic even if the user passes - # columns from an unsorted collection, such as dict.values(). - sorted_columns = sorted(categorical_columns, key=lambda x: x.name) shared_embedding_collection_name = '_'.join(c.name for c in sorted_columns) shared_embedding_collection_name += '_shared_embedding' diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 9981f358b1..6ac5ce8757 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4162,6 +4162,38 @@ class SharedEmbeddingColumnTest(test.TestCase): [categorical_column_a, categorical_column_b], dimension=2, initializer='not_fn') + def test_incompatible_column_type(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + categorical_column_c = fc.categorical_column_with_hash_bucket( + key='ccc', hash_bucket_size=3) + with self.assertRaisesRegexp( + ValueError, + 'all categorical_columns must have the same type.*' + '_IdentityCategoricalColumn.*_HashedCategoricalColumn'): + fc_lib._shared_embedding_columns( + [categorical_column_a, categorical_column_b, categorical_column_c], + dimension=2) + + def test_weighted_categorical_column_ok(self): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + weighted_categorical_column_a = fc.weighted_categorical_column( + categorical_column_a, weight_feature_key='aaa_weights') + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + weighted_categorical_column_b = fc.weighted_categorical_column( + categorical_column_b, weight_feature_key='bbb_weights') + fc_lib._shared_embedding_columns( + [weighted_categorical_column_a, categorical_column_b], dimension=2) + fc_lib._shared_embedding_columns( + [categorical_column_a, weighted_categorical_column_b], dimension=2) + fc_lib._shared_embedding_columns( + [weighted_categorical_column_a, weighted_categorical_column_b], + dimension=2) + def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) -- GitLab From cb12ebe044ad8fb8515bc9d95d27c0ab19ec314b Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 17 Nov 2017 15:20:49 -0800 Subject: [PATCH 0075/1225] Provide an option to use ApiDef instead of OpGenOverrides when generating C++ API. Also, updating UpdateDocs method to ApiDef to replace names in docs. PiperOrigin-RevId: 176167953 --- tensorflow/cc/BUILD | 27 +++ tensorflow/cc/framework/cc_op_gen.cc | 218 ++++++++++-------- tensorflow/cc/framework/cc_op_gen.h | 6 +- tensorflow/cc/framework/cc_op_gen_main.cc | 38 ++- tensorflow/cc/framework/cc_op_gen_test.cc | 195 ++++++++++++++++ tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/core/BUILD | 6 +- tensorflow/core/api_def/api_test.cc | 9 + .../base_api/api_def_ApplyAddSign.pbtxt | 65 ++++++ .../base_api/api_def_ApplyPowerSign.pbtxt | 65 ++++++ .../api_def_BytesProducedStatsDataset.pbtxt | 4 + .../base_api/api_def_DeserializeSparse.pbtxt | 19 ++ .../api_def_GenerateVocabRemapping.pbtxt | 13 +- .../api_def_IteratorSetStatsAggregator.pbtxt | 4 + .../api_def_LatencyStatsDataset.pbtxt | 4 + .../base_api/api_def_MatrixExponential.pbtxt | 32 +++ .../api_def/base_api/api_def_NthElement.pbtxt | 2 +- .../api_def_ResourceApplyAddSign.pbtxt | 59 +++++ .../api_def_ResourceApplyPowerSign.pbtxt | 59 +++++ .../api_def_StatsAggregatorHandle.pbtxt | 4 + .../api_def_StatsAggregatorSummary.pbtxt | 4 + .../base_api/api_def_TensorArrayV3.pbtxt | 11 + .../api_def_DeserializeSparse.pbtxt | 4 + .../api_def_MatrixExponential.pbtxt | 4 + tensorflow/core/framework/op_gen_lib.cc | 60 +++++ tensorflow/core/framework/op_gen_lib.h | 6 + tensorflow/core/framework/op_gen_lib_test.cc | 57 +++++ tensorflow/tensorflow.bzl | 31 ++- 28 files changed, 894 insertions(+), 114 deletions(-) create mode 100644 tensorflow/cc/framework/cc_op_gen_test.cc create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BytesProducedStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_DeserializeSparse.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_IteratorSetStatsAggregator.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LatencyStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorHandle.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_StatsAggregatorSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DeserializeSparse.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MatrixExponential.pbtxt diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 80112f9b44..e354831d7d 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -421,6 +421,7 @@ tf_cc_test( tf_gen_op_wrappers_cc( name = "cc_ops", + api_def_srcs = ["//tensorflow/core:base_api_def"], op_lib_names = [ "array_ops", "audio_ops", @@ -525,6 +526,30 @@ cc_library_with_android_deps( "//tensorflow/core:android_tensorflow_lib", ], copts = tf_copts(), + data = [ + "//tensorflow/core:base_api_def", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:op_gen_lib", + "//tensorflow/core:op_gen_overrides_proto_cc", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "cc_op_gen_test", + srcs = [ + "framework/cc_op_gen.cc", + "framework/cc_op_gen.h", + "framework/cc_op_gen_test.cc", + ], + data = [ + "//tensorflow/cc:ops/op_gen_overrides.pbtxt", + ], deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -533,6 +558,8 @@ cc_library_with_android_deps( "//tensorflow/core:op_gen_overrides_proto_cc", "//tensorflow/core:proto_text", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", ], ) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index 38a17598b8..6f2b7acb82 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -18,8 +18,10 @@ limitations under the License. #include #include "tensorflow/cc/framework/cc_op_gen.h" +#include "tensorflow/core/framework/api_def.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/op_def_util.h" #include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/framework/op_gen_overrides.pb.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -385,10 +387,10 @@ bool ArgIsList(const OpDef::ArgDef& arg) { } bool HasOptionalAttrs( - const OpDef& op_def, + const ApiDef& api_def, const std::unordered_map& inferred_input_attrs) { - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < api_def.attr_size(); ++i) { + const auto& attr(api_def.attr(i)); if ((inferred_input_attrs.find(attr.name()) == inferred_input_attrs.end()) && attr.has_default_value()) { @@ -398,12 +400,21 @@ bool HasOptionalAttrs( return false; } +const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) { + for (int i = 0; i < api_def.in_arg_size(); ++i) { + if (api_def.in_arg(i).name() == name) { + return &api_def.in_arg(i); + } + } + return nullptr; +} + struct OpInfo { // graph_op_def: The OpDef used by the runtime, has the names that // must be used when calling NodeBuilder. // interface_op_def: The OpDef used in the interface in the generated // code, with possibly overridden names and defaults. - explicit OpInfo(const OpDef& graph_op_def, const OpDef& inteface_op_def, + explicit OpInfo(const OpDef& graph_op_def, const ApiDef& api_def, const std::vector& aliases); string GetOpAttrStruct() const; string GetConstructorDecl(StringPiece op_name_prefix, @@ -423,74 +434,81 @@ struct OpInfo { string comment; const OpDef& graph_op_def; - const OpDef& op_def; + const ApiDef& api_def; const std::vector& aliases; + // Map from type attribute to corresponding original argument name. std::unordered_map inferred_input_attrs; }; -OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def, - const std::vector& a) - : graph_op_def(g_op_def), op_def(i_op_def), aliases(a) { - op_name = op_def.name(); - InferOpAttributes(op_def, &inferred_input_attrs); - has_optional_attrs = HasOptionalAttrs(op_def, inferred_input_attrs); +OpInfo::OpInfo(const OpDef& graph_op_def, const ApiDef& api_def, + const std::vector& aliases) + : graph_op_def(graph_op_def), api_def(api_def), aliases(aliases) { + op_name = api_def.endpoint(0).name(); + InferOpAttributes(graph_op_def, &inferred_input_attrs); + has_optional_attrs = HasOptionalAttrs(api_def, inferred_input_attrs); arg_types.push_back("const ::tensorflow::Scope&"); arg_names.push_back("scope"); - if (op_def.has_deprecation()) { - if (!op_def.summary().empty()) { - comment = strings::StrCat(op_def.summary(), "\n"); + if (graph_op_def.has_deprecation()) { + if (!api_def.summary().empty()) { + comment = strings::StrCat(api_def.summary(), "\n"); } strings::StrAppend(&comment, "DEPRECATED at GraphDef version ", - op_def.deprecation().version(), ":\n", - op_def.deprecation().explanation(), ".\n"); - } else if (op_def.summary().empty()) { + graph_op_def.deprecation().version(), ":\n", + graph_op_def.deprecation().explanation(), ".\n"); + } else if (api_def.summary().empty()) { comment = "TODO: add doc.\n"; } else { - comment = strings::StrCat(op_def.summary(), "\n"); + comment = strings::StrCat(api_def.summary(), "\n"); } - if (!op_def.description().empty()) { - strings::StrAppend(&comment, "\n", op_def.description(), "\n"); + if (!api_def.description().empty()) { + strings::StrAppend(&comment, "\n", api_def.description(), "\n"); } strings::StrAppend(&comment, "\nArguments:\n* scope: A Scope object\n"); // Process inputs - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); + for (int i = 0; i < api_def.arg_order_size(); ++i) { + const auto& arg = *FindInputArg(api_def.arg_order(i), graph_op_def); + const auto& api_def_arg = *FindInputArg(api_def.arg_order(i), api_def); arg_types.push_back(strings::StrCat( "::tensorflow::", ArgIsList(arg) ? "InputList" : "Input")); - arg_names.push_back(AvoidCPPKeywords(arg.name())); + arg_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to())); // TODO(keveman): Include input type information. - StringPiece description = arg.description(); + StringPiece description = api_def_arg.description(); if (!description.empty()) { ConsumeEquals(&description); - strings::StrAppend(&comment, "* ", AvoidCPPKeywords(arg.name()), ": ", - arg.description(), "\n"); + strings::StrAppend(&comment, "* ", + AvoidCPPKeywords(api_def_arg.rename_to()), ": ", + api_def_arg.description(), "\n"); } } // Process attrs string required_attrs_comment; string optional_attrs_comment; - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + // ApiDef attributes must be in the same order as in OpDef since + // we initialize ApiDef based on OpDef. + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); + CHECK_EQ(attr.name(), api_def_attr.name()); // Skip inferred arguments if (inferred_input_attrs.count(attr.name()) > 0) continue; const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - string attr_name = AvoidCPPKeywords(attr.name()); + string attr_name = AvoidCPPKeywords(api_def_attr.rename_to()); string attr_comment; - if (!attr.description().empty()) { + if (!api_def_attr.description().empty()) { // TODO(keveman): Word wrap and indent this, to handle multi-line // descriptions. strings::StrAppend(&attr_comment, "* ", attr_name, ": ", - attr.description(), "\n"); + api_def_attr.description(), "\n"); } - if (attr.has_default_value()) { + if (api_def_attr.has_default_value()) { strings::StrAppend(&optional_attrs_comment, attr_comment); } else { strings::StrAppend(&required_attrs_comment, attr_comment); @@ -508,44 +526,49 @@ OpInfo::OpInfo(const OpDef& g_op_def, const OpDef& i_op_def, } // Process outputs - for (int i = 0; i < op_def.output_arg_size(); ++i) { - const auto& arg = op_def.output_arg(i); + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { + // ApiDef arguments must be in the same order as in OpDef since + // we initialize ApiDef based on OpDef. + const auto& arg = graph_op_def.output_arg(i); + const auto& api_def_arg(api_def.out_arg(i)); + CHECK_EQ(arg.name(), api_def_arg.name()); + bool is_list = ArgIsList(arg); output_types.push_back( strings::StrCat("::tensorflow::", is_list ? "OutputList" : "Output")); - output_names.push_back(AvoidCPPKeywords(arg.name())); + output_names.push_back(AvoidCPPKeywords(api_def_arg.rename_to())); is_list_output.push_back(is_list); } strings::StrAppend(&comment, "\nReturns:\n"); - if (op_def.output_arg_size() == 0) { // No outputs. + if (graph_op_def.output_arg_size() == 0) { // No outputs. strings::StrAppend(&comment, "* the created `Operation`\n"); - } else if (op_def.output_arg_size() == 1) { // One output + } else if (graph_op_def.output_arg_size() == 1) { // One output if (is_list_output[0]) { strings::StrAppend(&comment, "* `OutputList`: "); } else { strings::StrAppend(&comment, "* `Output`: "); } - if (op_def.output_arg(0).description().empty()) { - strings::StrAppend(&comment, "The ", op_def.output_arg(0).name(), + if (api_def.out_arg(0).description().empty()) { + strings::StrAppend(&comment, "The ", api_def.out_arg(0).name(), " tensor.\n"); } else { // TODO(josh11b): Word wrap this. - strings::StrAppend(&comment, op_def.output_arg(0).description(), "\n"); + strings::StrAppend(&comment, api_def.out_arg(0).description(), "\n"); } } else { // Multiple outputs. - for (int i = 0; i < op_def.output_arg_size(); ++i) { + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { if (is_list_output[i]) { strings::StrAppend(&comment, "* `OutputList`"); } else { strings::StrAppend(&comment, "* `Output`"); } strings::StrAppend(&comment, " ", output_names[i]); - if (op_def.output_arg(i).description().empty()) { + if (api_def.out_arg(i).description().empty()) { strings::StrAppend(&comment, "\n"); } else { // TODO(josh11b): Word wrap this. - strings::StrAppend(&comment, ": ", op_def.output_arg(i).description(), + strings::StrAppend(&comment, ": ", api_def.out_arg(i).description(), "\n"); } } @@ -564,19 +587,20 @@ string OpInfo::GetOpAttrStruct() const { string struct_fields; string setters; - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); // If attr will be inferred or it doesn't have a default value, don't // add it to the struct. if ((inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) || - !attr.has_default_value()) { + !api_def_attr.has_default_value()) { continue; } const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - const string camel_case_name = ToCamelCase(attr.name()); + const string camel_case_name = ToCamelCase(api_def_attr.rename_to()); const string suffix = (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : ""; const string attr_func_def = @@ -584,22 +608,25 @@ string OpInfo::GetOpAttrStruct() const { attr_type_name, use_const ? "&" : ""); string attr_comment; - if (!attr.description().empty()) { - strings::StrAppend(&attr_comment, attr.description(), "\n\n"); + if (!api_def_attr.description().empty()) { + strings::StrAppend(&attr_comment, api_def_attr.description(), "\n\n"); } strings::StrAppend(&attr_comment, "Defaults to ", - SummarizeAttrValue(attr.default_value()), "\n"); + SummarizeAttrValue(api_def_attr.default_value()), "\n"); attr_comment = MakeComment(attr_comment, " "); strings::StrAppend(&setters, attr_comment); strings::StrAppend(&setters, " Attrs ", attr_func_def, " x) {\n"); strings::StrAppend(&setters, " Attrs ret = *this;\n"); - strings::StrAppend(&setters, " ret.", attr.name(), "_ = x;\n"); + strings::StrAppend(&setters, " ret.", api_def_attr.rename_to(), + "_ = x;\n"); strings::StrAppend(&setters, " return ret;\n }\n\n"); strings::StrAppend( - &struct_fields, " ", attr_type_name, " ", attr.name(), "_ = ", - PrintAttrValue(op_def.name(), attr.default_value()), ";\n"); + &struct_fields, " ", attr_type_name, " ", api_def_attr.rename_to(), + "_ = ", + PrintAttrValue(graph_op_def.name(), api_def_attr.default_value()), + ";\n"); } if (struct_fields.empty()) { @@ -676,17 +703,18 @@ void OpInfo::WriteClassDecl(WritableFile* h) const { // Add the static functions to set optional attrs if (has_optional_attrs) { strings::StrAppend(&class_decl, "\n"); - for (int i = 0; i < op_def.attr_size(); ++i) { - const auto& attr(op_def.attr(i)); + for (int i = 0; i < graph_op_def.attr_size(); ++i) { + const auto& attr(graph_op_def.attr(i)); + const auto& api_def_attr(api_def.attr(i)); if ((inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) || - !attr.has_default_value()) { + !api_def_attr.has_default_value()) { continue; } const auto entry = AttrTypeName(attr.type()); const auto attr_type_name = entry.first; const bool use_const = entry.second; - const string camel_case_name = ToCamelCase(attr.name()); + const string camel_case_name = ToCamelCase(api_def_attr.rename_to()); const string suffix = (camel_case_name == op_name || camel_case_name == "Attrs") ? "_" : ""; const string attr_func_def = strings::StrCat( @@ -726,11 +754,11 @@ void OpInfo::GetOutput(string* out) const { strings::StrCat("if (!", scope_str, ".ok()) return;"); // No outputs. - if (op_def.output_arg_size() == 0) { + if (graph_op_def.output_arg_size() == 0) { strings::StrAppend(out, " this->operation = Operation(ret);\n return;\n"); return; } - if (op_def.output_arg_size() == 1) { + if (graph_op_def.output_arg_size() == 1) { // One output, no need for NameRangeMap if (is_list_output[0]) { strings::StrAppend(out, @@ -752,7 +780,7 @@ void OpInfo::GetOutput(string* out) const { ".UpdateStatus(_status_);\n", " return;\n"); strings::StrAppend(out, " }\n\n"); - for (int i = 0; i < op_def.output_arg_size(); ++i) { + for (int i = 0; i < graph_op_def.output_arg_size(); ++i) { const string arg_range = strings::StrCat( "_outputs_range[\"", graph_op_def.output_arg(i).name(), "\"]"); if (is_list_output[i]) { @@ -776,11 +804,13 @@ string OpInfo::GetConstructorBody() const { strings::StrAppend(&body, " ", return_on_error, "\n"); - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); - strings::StrAppend(&body, " auto _", arg.name(), " = ::tensorflow::ops::", - ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(", - scope_str, ", ", AvoidCPPKeywords(arg.name()), ");\n"); + for (int i = 0; i < graph_op_def.input_arg_size(); ++i) { + const auto& arg(graph_op_def.input_arg(i)); + const auto& api_def_arg(api_def.in_arg(i)); + strings::StrAppend( + &body, " auto _", api_def_arg.rename_to(), " = ::tensorflow::ops::", + ArgIsList(arg) ? "AsNodeOutList" : "AsNodeOut", "(", scope_str, ", ", + AvoidCPPKeywords(api_def_arg.rename_to()), ");\n"); strings::StrAppend(&body, " ", return_on_error, "\n"); } @@ -791,19 +821,21 @@ string OpInfo::GetConstructorBody() const { &body, " auto builder = ::tensorflow::NodeBuilder(unique_name, \"", graph_op_def.name(), "\")\n"); const string spaces = " "; - for (int i = 0; i < op_def.input_arg_size(); ++i) { - const auto& arg(op_def.input_arg(i)); - strings::StrAppend(&body, spaces, ".Input(_", arg.name(), ")\n"); + for (int i = 0; i < api_def.in_arg_size(); ++i) { + const auto& arg(api_def.in_arg(i)); + strings::StrAppend(&body, spaces, ".Input(_", arg.rename_to(), ")\n"); } - for (int i = 0; i < op_def.attr_size(); ++i) { + for (int i = 0; i < api_def.attr_size(); ++i) { const auto& graph_attr(graph_op_def.attr(i)); - const auto& attr(op_def.attr(i)); - if (inferred_input_attrs.find(attr.name()) != inferred_input_attrs.end()) { + const auto& api_def_attr(api_def.attr(i)); + if (inferred_input_attrs.find(api_def_attr.name()) != + inferred_input_attrs.end()) { continue; } - const string attr_name = attr.has_default_value() - ? strings::StrCat("attrs.", attr.name(), "_") - : AvoidCPPKeywords(attr.name()); + const string attr_name = + api_def_attr.has_default_value() + ? strings::StrCat("attrs.", api_def_attr.rename_to(), "_") + : AvoidCPPKeywords(api_def_attr.rename_to()); strings::StrAppend(&body, spaces, ".Attr(\"", graph_attr.name(), "\", ", attr_name, ")\n"); } @@ -845,10 +877,10 @@ void OpInfo::WriteClassDef(WritableFile* cc) const { TF_CHECK_OK(cc->Append(class_def)); } -void WriteCCOp(const OpDef& graph_op_def, const OpDef& interface_op_def, +void WriteCCOp(const OpDef& graph_op_def, const ApiDef& api_def, const std::vector& aliases, WritableFile* h, WritableFile* cc) { - OpInfo op_info(graph_op_def, interface_op_def, aliases); + OpInfo op_info(graph_op_def, api_def, aliases); op_info.WriteClassDecl(h); op_info.WriteClassDef(cc); @@ -943,8 +975,9 @@ string MakeInternal(const string& fname) { } // namespace -void WriteCCOps(const OpList& ops, const string& dot_h_fname, - const string& dot_cc_fname, const string& overrides_fnames) { +void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map, + const string& dot_h_fname, const string& dot_cc_fname, + const string& overrides_fnames) { Env* env = Env::Default(); // Load the override map. @@ -984,24 +1017,23 @@ void WriteCCOps(const OpList& ops, const string& dot_h_fname, // code depends on it. if (graph_op_def.name() == "Const") continue; - // Incorporate overrides from override_map. - OpDef interface_op_def = graph_op_def; - const OpGenOverride* op_override = - override_map.ApplyOverride(&interface_op_def); + const auto* api_def = api_def_map.GetApiDef(graph_op_def.name()); + std::vector aliases; - if (op_override) { - if (op_override->skip()) continue; - aliases.assign(op_override->alias().begin(), op_override->alias().end()); - if (op_override->hide()) { - // Write hidden ops to _internal.h and _internal.cc. - WriteCCOp(graph_op_def, interface_op_def, aliases, internal_h.get(), - internal_cc.get()); - continue; - } + if (api_def->visibility() == ApiDef::SKIP) continue; + // First endpoint is canonical, the rest are aliases. + for (int endpoint_i = 1; endpoint_i < api_def->endpoint_size(); + ++endpoint_i) { + aliases.push_back(api_def->endpoint(endpoint_i).name()); + } + if (api_def->visibility() == ApiDef::HIDDEN) { + // Write hidden ops to _internal.h and _internal.cc. + WriteCCOp(graph_op_def, *api_def, aliases, internal_h.get(), + internal_cc.get()); + continue; } - // This isn't a hidden op, write it to the main files. - WriteCCOp(graph_op_def, interface_op_def, aliases, h.get(), cc.get()); + WriteCCOp(graph_op_def, *api_def, aliases, h.get(), cc.get()); } FinishFiles(false, h.get(), cc.get(), op_header_guard); diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h index fa5e004f03..cea2899014 100644 --- a/tensorflow/cc/framework/cc_op_gen.h +++ b/tensorflow/cc/framework/cc_op_gen.h @@ -17,13 +17,15 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_CC_FRAMEWORK_CC_OP_GEN_H_ #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { /// Result is written to files dot_h and dot_cc. -void WriteCCOps(const OpList& ops, const string& dot_h_fname, - const string& dot_cc_fname, const string& overrides_fnames); +void WriteCCOps(const OpList& ops, const ApiDefMap& api_def_map, + const string& dot_h_fname, const string& dot_cc_fname, + const string& overrides_fnames); } // namespace tensorflow diff --git a/tensorflow/cc/framework/cc_op_gen_main.cc b/tensorflow/cc/framework/cc_op_gen_main.cc index 3b80cf993e..326d5668b8 100644 --- a/tensorflow/cc/framework/cc_op_gen_main.cc +++ b/tensorflow/cc/framework/cc_op_gen_main.cc @@ -16,7 +16,11 @@ limitations under the License. #include "tensorflow/cc/framework/cc_op_gen.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" #include "tensorflow/core/platform/types.h" @@ -24,10 +28,28 @@ namespace tensorflow { namespace { void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc, - const std::string& overrides_fnames, bool include_internal) { + const std::string& overrides_fnames, bool include_internal, + const std::vector& api_def_dirs) { OpList ops; OpRegistry::Global()->Export(include_internal, &ops); - WriteCCOps(ops, dot_h, dot_cc, overrides_fnames); + ApiDefMap api_def_map(ops); + if (!api_def_dirs.empty()) { + Env* env = Env::Default(); + // Only load files that correspond to "ops". + for (const auto& op : ops.op()) { + for (const auto& api_def_dir : api_def_dirs) { + const std::string api_def_file_pattern = + io::JoinPath(api_def_dir, "api_def_" + op.name() + ".pbtxt"); + if (env->FileExists(api_def_file_pattern).ok()) { + TF_CHECK_OK(api_def_map.LoadFile(env, api_def_file_pattern)); + } + } + } + } + + api_def_map.UpdateDocs(); + + WriteCCOps(ops, api_def_map, dot_h, dot_cc, overrides_fnames); } } // namespace @@ -35,18 +57,24 @@ void PrintAllCCOps(const std::string& dot_h, const std::string& dot_cc, int main(int argc, char* argv[]) { tensorflow::port::InitMain(argv[0], &argc, &argv); - if (argc != 5) { + // TODO(annarev): Update this file to no longer take op_gen_overrides.pbtxt + // as an argument. + if (argc != 6) { for (int i = 1; i < argc; ++i) { fprintf(stderr, "Arg %d = %s\n", i, argv[i]); } fprintf(stderr, - "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal\n" + "Usage: %s out.h out.cc overrides1.pbtxt,2.pbtxt include_internal " + "api_def_dirs1,api_def_dir2 ...\n" " include_internal: 1 means include internal ops\n", argv[0]); exit(1); } bool include_internal = tensorflow::StringPiece("1") == argv[4]; - tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal); + std::vector api_def_dirs = tensorflow::str_util::Split( + argv[5], ",", tensorflow::str_util::SkipEmpty()); + tensorflow::PrintAllCCOps(argv[1], argv[2], argv[3], include_internal, + api_def_dirs); return 0; } diff --git a/tensorflow/cc/framework/cc_op_gen_test.cc b/tensorflow/cc/framework/cc_op_gen_test.cc new file mode 100644 index 0000000000..0b7e720a5c --- /dev/null +++ b/tensorflow/cc/framework/cc_op_gen_test.cc @@ -0,0 +1,195 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/framework/cc_op_gen.h" + +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/op_gen_lib.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +// TODO(annarev): Remove this op_gen_overrides.pbtxt reference. +// It is needed only because WriteCCOps takes it as an argument. +constexpr char kOverridesFnames[] = + "tensorflow/cc/ops/op_gen_overrides.pbtxt"; +constexpr char kBaseOpDef[] = R"( +op { + name: "Foo" + input_arg { + name: "images" + description: "Images to process." + } + input_arg { + name: "dim" + description: "Description for dim." + type: DT_FLOAT + } + output_arg { + name: "output" + description: "Description for output." + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + description: "Type for images" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + } + } + default_value { + i: 1 + } + } + summary: "Summary for op Foo." + description: "Description for op Foo." +} +)"; + +void ExpectHasSubstr(StringPiece s, StringPiece expected) { + EXPECT_TRUE(s.contains(expected)) + << "'" << s << "' does not contain '" << expected << "'"; +} + +void ExpectDoesNotHaveSubstr(StringPiece s, StringPiece expected) { + EXPECT_FALSE(s.contains(expected)) + << "'" << s << "' contains '" << expected << "'"; +} + +void ExpectSubstrOrder(const string& s, const string& before, + const string& after) { + int before_pos = s.find(before); + int after_pos = s.find(after); + ASSERT_NE(std::string::npos, before_pos); + ASSERT_NE(std::string::npos, after_pos); + EXPECT_LT(before_pos, after_pos) + << before << " is not before " << after << " in " << s; +} + +// Runs WriteCCOps and stores output in (internal_)cc_file_path and +// (internal_)h_file_path. +void GenerateCcOpFiles(Env* env, const OpList& ops, + const ApiDefMap& api_def_map, string* h_file_text, + string* internal_h_file_text) { + const string& tmpdir = testing::TmpDir(); + + const auto h_file_path = io::JoinPath(tmpdir, "test.h"); + const auto cc_file_path = io::JoinPath(tmpdir, "test.cc"); + const auto internal_h_file_path = io::JoinPath(tmpdir, "test_internal.h"); + const auto internal_cc_file_path = io::JoinPath(tmpdir, "test_internal.cc"); + + WriteCCOps(ops, api_def_map, h_file_path, cc_file_path, kOverridesFnames); + + TF_ASSERT_OK(ReadFileToString(env, h_file_path, h_file_text)); + TF_ASSERT_OK( + ReadFileToString(env, internal_h_file_path, internal_h_file_text)); +} + +TEST(CcOpGenTest, TestVisibilityChangedToHidden) { + const string api_def = R"( +op { + graph_op_name: "Foo" + visibility: HIDDEN +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + ApiDefMap api_def_map(op_defs); + + string h_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo"); + ExpectDoesNotHaveSubstr(internal_h_file_text, "class Foo"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(internal_h_file_text, "class Foo"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo"); +} + +TEST(CcOpGenTest, TestArgNameChanges) { + const string api_def = R"( +op { + graph_op_name: "Foo" + arg_order: "dim" + arg_order: "images" +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + + ApiDefMap api_def_map(op_defs); + string cc_file_text, h_file_text; + string internal_cc_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectSubstrOrder(h_file_text, "Input images", "Input dim"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectSubstrOrder(h_file_text, "Input dim", "Input images"); +} + +TEST(CcOpGenTest, TestEndpoints) { + const string api_def = R"( +op { + graph_op_name: "Foo" + endpoint { + name: "Foo1" + } + endpoint { + name: "Foo2" + } +} +)"; + Env* env = Env::Default(); + OpList op_defs; + protobuf::TextFormat::ParseFromString(kBaseOpDef, &op_defs); // NOLINT + + ApiDefMap api_def_map(op_defs); + string cc_file_text, h_file_text; + string internal_cc_file_text, internal_h_file_text; + // Without ApiDef + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo {"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo1"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo2"); + + // With ApiDef + TF_ASSERT_OK(api_def_map.LoadApiDef(api_def)); + GenerateCcOpFiles(env, op_defs, api_def_map, &h_file_text, + &internal_h_file_text); + ExpectHasSubstr(h_file_text, "class Foo1"); + ExpectHasSubstr(h_file_text, "typedef Foo1 Foo2"); + ExpectDoesNotHaveSubstr(h_file_text, "class Foo {"); +} +} // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index a5f5ae5478..45eeb11062 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -83,7 +83,7 @@ foreach(tf_cc_op_lib_name ${tf_cc_op_lib_names}) ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}_internal.cc - COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} + COMMAND ${tf_cc_op_lib_name}_gen_cc ${cc_ops_target_dir}/${tf_cc_op_lib_name}.h ${cc_ops_target_dir}/${tf_cc_op_lib_name}.cc ${tensorflow_source_dir}/tensorflow/cc/ops/op_gen_overrides.pbtxt ${cc_ops_include_internal} ${tensorflow_source_dir}/tensorflow/core/api_def/base_api DEPENDS ${tf_cc_op_lib_name}_gen_cc create_cc_ops_header_dir ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 01ddbfc2d4..ee14078496 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3371,7 +3371,7 @@ tf_cc_test( filegroup( name = "base_api_def", - data = glob(["api_def/base_api/*"]), + srcs = glob(["api_def/base_api/*"]), ) filegroup( @@ -3386,10 +3386,6 @@ tf_cc_test( ":base_api_def", "//tensorflow/cc:ops/op_gen_overrides.pbtxt", ], - tags = [ - "manual", - "notap", - ], deps = [ ":framework", ":framework_internal", diff --git a/tensorflow/core/api_def/api_test.cc b/tensorflow/core/api_def/api_test.cc index f222d345ab..2cdc14843f 100644 --- a/tensorflow/core/api_def/api_test.cc +++ b/tensorflow/core/api_def/api_test.cc @@ -221,9 +221,18 @@ std::unordered_map GenerateApiDef( std::unordered_map api_defs_map; + // These ops are included in OpList only if TF_NEED_GCP + // is set to true. So, we skip them for now so that this test passes + // whether TF_NEED_GCP is set or not. + const std::unordered_set ops_to_exclude = { + "BigQueryReader", "GenerateBigQueryReaderPartitions"}; for (const auto& op : ops.op()) { CHECK(!op.name().empty()) << "Encountered empty op name: %s" << op.DebugString(); + if (ops_to_exclude.find(op.name()) != ops_to_exclude.end()) { + LOG(INFO) << "Skipping " << op.name(); + continue; + } string file_path = io::JoinPath(api_def_dir, kApiDefFileFormat); file_path = strings::Printf(file_path.c_str(), op.name().c_str()); ApiDef* api_def = api_defs_map[file_path].add_op(); diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt new file mode 100644 index 0000000000..dd46095252 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAddSign.pbtxt @@ -0,0 +1,65 @@ +op { + graph_op_name: "ApplyAddSign" + in_arg { + name: "var" + description: <